diff --git a/crates/core_arch/src/x86/abm.rs b/crates/core_arch/src/x86/abm.rs index 5519c83e12..e6d5517600 100644 --- a/crates/core_arch/src/x86/abm.rs +++ b/crates/core_arch/src/x86/abm.rs @@ -29,7 +29,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _lzcnt_u32(x: u32) -> u32 { +pub fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } @@ -40,7 +40,7 @@ pub unsafe fn _lzcnt_u32(x: u32) -> u32 { #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _popcnt32(x: i32) -> i32 { +pub fn _popcnt32(x: i32) -> i32 { x.count_ones() as i32 } diff --git a/crates/core_arch/src/x86/aes.rs b/crates/core_arch/src/x86/aes.rs index 789081cdb5..7db743b2cc 100644 --- a/crates/core_arch/src/x86/aes.rs +++ b/crates/core_arch/src/x86/aes.rs @@ -35,8 +35,8 @@ unsafe extern "C" { #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdec))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { - aesdec(a, round_key) +pub fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesdec(a, round_key) } } /// Performs the last round of an AES decryption flow on data (state) in `a`. @@ -46,8 +46,8 @@ pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdeclast))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { - aesdeclast(a, round_key) +pub fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesdeclast(a, round_key) } } /// Performs one round of an AES encryption flow on data (state) in `a`. @@ -57,8 +57,8 @@ pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenc))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { - aesenc(a, round_key) +pub fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesenc(a, round_key) } } /// Performs the last round of an AES encryption flow on data (state) in `a`. @@ -68,8 +68,8 @@ pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenclast))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { - aesenclast(a, round_key) +pub fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { + unsafe { aesenclast(a, round_key) } } /// Performs the `InvMixColumns` transformation on `a`. @@ -79,8 +79,8 @@ pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesimc))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { - aesimc(a) +pub fn _mm_aesimc_si128(a: __m128i) -> __m128i { + unsafe { aesimc(a) } } /// Assist in expanding the AES cipher key. @@ -95,9 +95,9 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(aeskeygenassist, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i) -> __m128i { +pub fn _mm_aeskeygenassist_si128(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - aeskeygenassist(a, IMM8 as u8) + unsafe { aeskeygenassist(a, IMM8 as u8) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index f78ca6d839..5a8ed54fae 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -30,8 +30,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { - simd_add(a, b) +pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_add(a, b) } } /// Adds packed single-precision (32-bit) floating-point elements in `a` and @@ -42,8 +42,8 @@ pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { - simd_add(a, b) +pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_add(a, b) } } /// Computes the bitwise AND of a packed double-precision (64-bit) @@ -55,10 +55,12 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { // See https://github.com/rust-lang/stdarch/issues/71 #[cfg_attr(test, assert_instr(vandp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_and(a, b)) +pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(a, b)) + } } /// Computes the bitwise AND of packed single-precision (32-bit) floating-point @@ -69,10 +71,12 @@ pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_and(a, b)) +pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(a, b)) + } } /// Computes the bitwise OR packed double-precision (64-bit) floating-point @@ -84,10 +88,12 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { // See . #[cfg_attr(test, assert_instr(vorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_or(a, b)) +pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_or(a, b)) + } } /// Computes the bitwise OR packed single-precision (32-bit) floating-point @@ -98,10 +104,12 @@ pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_or(a, b)) +pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_or(a, b)) + } } /// Shuffles double-precision (64-bit) floating-point elements within 128-bit @@ -113,18 +121,20 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b1, - ((MASK as u32 >> 1) & 0b1) + 4, - ((MASK as u32 >> 2) & 0b1) + 2, - ((MASK as u32 >> 3) & 0b1) + 6, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 4, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 6, + ], + ) + } } /// Shuffles single-precision (32-bit) floating-point elements in `a` within @@ -136,22 +146,24 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d) -> __m2 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11) + 8, - ((MASK as u32 >> 6) & 0b11) + 8, - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 12, - ((MASK as u32 >> 6) & 0b11) + 12, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) + } } /// Computes the bitwise NOT of packed double-precision (64-bit) floating-point @@ -162,10 +174,12 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) +pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) + } } /// Computes the bitwise NOT of packed single-precision (32-bit) floating-point @@ -177,10 +191,12 @@ pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) +pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) + } } /// Compares packed double-precision (64-bit) floating-point elements @@ -191,8 +207,8 @@ pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { - vmaxpd(a, b) +pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vmaxpd(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` @@ -203,8 +219,8 @@ pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { - vmaxps(a, b) +pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vmaxps(a, b) } } /// Compares packed double-precision (64-bit) floating-point elements @@ -215,8 +231,8 @@ pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { - vminpd(a, b) +pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vminpd(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` @@ -227,8 +243,8 @@ pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { - vminps(a, b) +pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vminps(a, b) } } /// Multiplies packed double-precision (64-bit) floating-point elements @@ -239,8 +255,8 @@ pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { - simd_mul(a, b) +pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_mul(a, b) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` and @@ -251,8 +267,8 @@ pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { - simd_mul(a, b) +pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_mul(a, b) } } /// Alternatively adds and subtracts packed double-precision (64-bit) @@ -263,12 +279,14 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Alternatively adds and subtracts packed single-precision (32-bit) @@ -279,12 +297,14 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) +pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } } /// Subtracts packed double-precision (64-bit) floating-point elements in `b` @@ -295,8 +315,8 @@ pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { - simd_sub(a, b) +pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_sub(a, b) } } /// Subtracts packed single-precision (32-bit) floating-point elements in `b` @@ -307,8 +327,8 @@ pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { - simd_sub(a, b) +pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_sub(a, b) } } /// Computes the division of each of the 8 packed 32-bit floating-point elements @@ -319,8 +339,8 @@ pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { - simd_div(a, b) +pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_div(a, b) } } /// Computes the division of each of the 4 packed 64-bit floating-point elements @@ -331,8 +351,8 @@ pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { - simd_div(a, b) +pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_div(a, b) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -353,9 +373,9 @@ pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_round_pd(a: __m256d) -> __m256d { +pub fn _mm256_round_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(ROUNDING, 4); - roundpd256(a, ROUNDING) + unsafe { roundpd256(a, ROUNDING) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -366,8 +386,8 @@ pub unsafe fn _mm256_round_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { - simd_ceil(a) +pub fn _mm256_ceil_pd(a: __m256d) -> __m256d { + unsafe { simd_ceil(a) } } /// Rounds packed double-precision (64-bit) floating point elements in `a` @@ -378,8 +398,8 @@ pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { - simd_floor(a) +pub fn _mm256_floor_pd(a: __m256d) -> __m256d { + unsafe { simd_floor(a) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -400,9 +420,9 @@ pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_round_ps(a: __m256) -> __m256 { +pub fn _mm256_round_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(ROUNDING, 4); - roundps256(a, ROUNDING) + unsafe { roundps256(a, ROUNDING) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -413,8 +433,8 @@ pub unsafe fn _mm256_round_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { - simd_ceil(a) +pub fn _mm256_ceil_ps(a: __m256) -> __m256 { + unsafe { simd_ceil(a) } } /// Rounds packed single-precision (32-bit) floating point elements in `a` @@ -425,8 +445,8 @@ pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { - simd_floor(a) +pub fn _mm256_floor_ps(a: __m256) -> __m256 { + unsafe { simd_floor(a) } } /// Returns the square root of packed single-precision (32-bit) floating point @@ -437,8 +457,8 @@ pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { - simd_fsqrt(a) +pub fn _mm256_sqrt_ps(a: __m256) -> __m256 { + unsafe { simd_fsqrt(a) } } /// Returns the square root of packed double-precision (64-bit) floating point @@ -449,8 +469,8 @@ pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { - simd_fsqrt(a) +pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d { + unsafe { simd_fsqrt(a) } } /// Blends packed double-precision (64-bit) floating-point elements from @@ -465,18 +485,20 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM4, 4); - simd_shuffle!( - a, - b, - [ - ((IMM4 as u32 >> 0) & 1) * 4 + 0, - ((IMM4 as u32 >> 1) & 1) * 4 + 1, - ((IMM4 as u32 >> 2) & 1) * 4 + 2, - ((IMM4 as u32 >> 3) & 1) * 4 + 3, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM4 as u32 >> 0) & 1) * 4 + 0, + ((IMM4 as u32 >> 1) & 1) * 4 + 1, + ((IMM4 as u32 >> 2) & 1) * 4 + 2, + ((IMM4 as u32 >> 3) & 1) * 4 + 3, + ], + ) + } } /// Blends packed single-precision (32-bit) floating-point elements from @@ -488,22 +510,24 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d) -> __m256 #[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - b, - [ - ((IMM8 as u32 >> 0) & 1) * 8 + 0, - ((IMM8 as u32 >> 1) & 1) * 8 + 1, - ((IMM8 as u32 >> 2) & 1) * 8 + 2, - ((IMM8 as u32 >> 3) & 1) * 8 + 3, - ((IMM8 as u32 >> 4) & 1) * 8 + 4, - ((IMM8 as u32 >> 5) & 1) * 8 + 5, - ((IMM8 as u32 >> 6) & 1) * 8 + 6, - ((IMM8 as u32 >> 7) & 1) * 8 + 7, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + ((IMM8 as u32 >> 0) & 1) * 8 + 0, + ((IMM8 as u32 >> 1) & 1) * 8 + 1, + ((IMM8 as u32 >> 2) & 1) * 8 + 2, + ((IMM8 as u32 >> 3) & 1) * 8 + 3, + ((IMM8 as u32 >> 4) & 1) * 8 + 4, + ((IMM8 as u32 >> 5) & 1) * 8 + 5, + ((IMM8 as u32 >> 6) & 1) * 8 + 6, + ((IMM8 as u32 >> 7) & 1) * 8 + 7, + ], + ) + } } /// Blends packed double-precision (64-bit) floating-point elements from @@ -514,9 +538,11 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); - transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) +pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); + transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) + } } /// Blends packed single-precision (32-bit) floating-point elements from @@ -527,9 +553,11 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); - transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) +pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); + transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) + } } /// Conditionally multiplies the packed single-precision (32-bit) floating-point @@ -543,9 +571,9 @@ pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - vdpps(a, b, IMM8) + unsafe { vdpps(a, b, IMM8) } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -558,8 +586,8 @@ pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { - vhaddpd(a, b) +pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhaddpd(a, b) } } /// Horizontal addition of adjacent pairs in the two packed vectors @@ -573,8 +601,8 @@ pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { - vhaddps(a, b) +pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhaddps(a, b) } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -587,8 +615,8 @@ pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { - vhsubpd(a, b) +pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { vhsubpd(a, b) } } /// Horizontal subtraction of adjacent pairs in the two packed vectors @@ -602,8 +630,8 @@ pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { - vhsubps(a, b) +pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { + unsafe { vhsubps(a, b) } } /// Computes the bitwise XOR of packed double-precision (64-bit) floating-point @@ -614,10 +642,12 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { - let a: u64x4 = transmute(a); - let b: u64x4 = transmute(b); - transmute(simd_xor(a, b)) +pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + let a: u64x4 = transmute(a); + let b: u64x4 = transmute(b); + transmute(simd_xor(a, b)) + } } /// Computes the bitwise XOR of packed single-precision (32-bit) floating-point @@ -628,10 +658,12 @@ pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { - let a: u32x8 = transmute(a); - let b: u32x8 = transmute(b); - transmute(simd_xor(a, b)) +pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + let a: u32x8 = transmute(a); + let b: u32x8 = transmute(b); + transmute(simd_xor(a, b)) + } } /// Equal (ordered, non-signaling) @@ -741,9 +773,9 @@ pub const _CMP_TRUE_US: i32 = 0x1f; #[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM5, 5); - vcmppd(a, b, const { IMM5 as i8 }) + unsafe { vcmppd(a, b, const { IMM5 as i8 }) } } /// Compares packed double-precision (64-bit) floating-point @@ -756,9 +788,9 @@ pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM5, 5); - vcmppd256(a, b, IMM5 as u8) + unsafe { vcmppd256(a, b, IMM5 as u8) } } /// Compares packed single-precision (32-bit) floating-point @@ -771,9 +803,9 @@ pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d) -> __m256d #[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM5, 5); - vcmpps(a, b, const { IMM5 as i8 }) + unsafe { vcmpps(a, b, const { IMM5 as i8 }) } } /// Compares packed single-precision (32-bit) floating-point @@ -786,9 +818,9 @@ pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM5, 5); - vcmpps256(a, b, const { IMM5 as u8 }) + unsafe { vcmpps256(a, b, const { IMM5 as u8 }) } } /// Compares the lower double-precision (64-bit) floating-point element in @@ -803,9 +835,9 @@ pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM5, 5); - vcmpsd(a, b, IMM5 as i8) + unsafe { vcmpsd(a, b, IMM5 as i8) } } /// Compares the lower single-precision (32-bit) floating-point element in @@ -820,9 +852,9 @@ pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM5, 5); - vcmpss(a, b, IMM5 as i8) + unsafe { vcmpss(a, b, IMM5 as i8) } } /// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) @@ -833,8 +865,8 @@ pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { - simd_cast(a.as_i32x4()) +pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { + unsafe { simd_cast(a.as_i32x4()) } } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -845,8 +877,8 @@ pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { - simd_cast(a.as_i32x8()) +pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { + unsafe { simd_cast(a.as_i32x8()) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -857,8 +889,8 @@ pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { - simd_cast(a) +pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { + unsafe { simd_cast(a) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -869,8 +901,8 @@ pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { - transmute(vcvtps2dq(a)) +pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvtps2dq(a)) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -881,8 +913,8 @@ pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { - simd_cast(a) +pub fn _mm256_cvtps_pd(a: __m128) -> __m256d { + unsafe { simd_cast(a) } } /// Returns the first element of the input vector of `[4 x double]`. @@ -892,8 +924,8 @@ pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movsd))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { - simd_extract!(a, 0) +pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 { + unsafe { simd_extract!(a, 0) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -904,8 +936,8 @@ pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { - transmute(vcvttpd2dq(a)) +pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq(a)) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` @@ -916,8 +948,8 @@ pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { - transmute(vcvtpd2dq(a)) +pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2dq(a)) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -928,8 +960,8 @@ pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { - transmute(vcvttps2dq(a)) +pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq(a)) } } /// Extracts 128 bits (composed of 4 packed single-precision (32-bit) @@ -944,13 +976,15 @@ pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { +pub fn _mm256_extractf128_ps(a: __m256) -> __m128 { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_undefined_ps(), - [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize], + ) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) @@ -965,9 +999,9 @@ pub unsafe fn _mm256_extractf128_ps(a: __m256) -> __m128 { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { +pub fn _mm256_extractf128_pd(a: __m256d) -> __m128d { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) + unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) } } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. @@ -981,10 +1015,12 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { +pub fn _mm256_extractf128_si256(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); - transmute(dst) + unsafe { + let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); + transmute(dst) + } } /// Extracts a 32-bit integer from `a`, selected with `INDEX`. @@ -995,9 +1031,9 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { +pub fn _mm256_extract_epi32(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 3); - simd_extract!(a.as_i32x8(), INDEX as u32) + unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) } } /// Returns the first element of the input vector of `[8 x i32]`. @@ -1006,8 +1042,8 @@ pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { #[inline] #[target_feature(enable = "avx")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { - simd_extract!(a.as_i32x8(), 0) +pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { + unsafe { simd_extract!(a.as_i32x8(), 0) } } /// Zeroes the contents of all XMM or YMM registers. @@ -1041,8 +1077,8 @@ pub unsafe fn _mm256_zeroupper() { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { - vpermilps256(a, b.as_i32x8()) +pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { + unsafe { vpermilps256(a, b.as_i32x8()) } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1053,8 +1089,8 @@ pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { - vpermilps(a, b.as_i32x4()) +pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { + unsafe { vpermilps(a, b.as_i32x4()) } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1066,22 +1102,24 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { +pub fn _mm256_permute_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - _mm256_undefined_ps(), - [ - (IMM8 as u32 >> 0) & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ((IMM8 as u32 >> 0) & 0b11) + 4, - ((IMM8 as u32 >> 2) & 0b11) + 4, - ((IMM8 as u32 >> 4) & 0b11) + 4, - ((IMM8 as u32 >> 6) & 0b11) + 4, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ((IMM8 as u32 >> 0) & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + ], + ) + } } /// Shuffles single-precision (32-bit) floating-point elements in `a` @@ -1093,18 +1131,20 @@ pub unsafe fn _mm256_permute_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { +pub fn _mm_permute_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - _mm_undefined_ps(), - [ - (IMM8 as u32 >> 0) & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm_undefined_ps(), + [ + (IMM8 as u32 >> 0) & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ) + } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1115,8 +1155,8 @@ pub unsafe fn _mm_permute_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { - vpermilpd256(a, b.as_i64x4()) +pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { + unsafe { vpermilpd256(a, b.as_i64x4()) } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1127,8 +1167,8 @@ pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { - vpermilpd(a, b.as_i64x2()) +pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { + unsafe { vpermilpd(a, b.as_i64x2()) } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1140,18 +1180,20 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { #[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { +pub fn _mm256_permute_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM4, 4); - simd_shuffle!( - a, - _mm256_undefined_pd(), - [ - ((IMM4 as u32 >> 0) & 1), - ((IMM4 as u32 >> 1) & 1), - ((IMM4 as u32 >> 2) & 1) + 2, - ((IMM4 as u32 >> 3) & 1) + 2, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_pd(), + [ + ((IMM4 as u32 >> 0) & 1), + ((IMM4 as u32 >> 1) & 1), + ((IMM4 as u32 >> 2) & 1) + 2, + ((IMM4 as u32 >> 3) & 1) + 2, + ], + ) + } } /// Shuffles double-precision (64-bit) floating-point elements in `a` @@ -1163,13 +1205,15 @@ pub unsafe fn _mm256_permute_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { +pub fn _mm_permute_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM2, 2); - simd_shuffle!( - a, - _mm_undefined_pd(), - [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], - ) + unsafe { + simd_shuffle!( + a, + _mm_undefined_pd(), + [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1], + ) + } } /// Shuffles 256 bits (composed of 8 packed single-precision (32-bit) @@ -1181,9 +1225,9 @@ pub unsafe fn _mm_permute_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); - vperm2f128ps256(a, b, IMM8 as i8) + unsafe { vperm2f128ps256(a, b, IMM8 as i8) } } /// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) @@ -1195,9 +1239,9 @@ pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256) -> _ #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); - vperm2f128pd256(a, b, IMM8 as i8) + unsafe { vperm2f128pd256(a, b, IMM8 as i8) } } /// Shuffles 128-bits (composed of integer data) selected by `imm8` @@ -1209,9 +1253,9 @@ pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d) -> #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_permute2f128_si256(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) + unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) } } /// Broadcasts a single-precision (32-bit) floating-point element from memory @@ -1290,13 +1334,15 @@ pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { +pub fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m256 { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_castps128_ps256(b), - [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_castps128_ps256(b), + [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize], + ) + } } /// Copies `a` to result, then inserts 128 bits (composed of 2 packed @@ -1312,13 +1358,15 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128) -> __m )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { +pub fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> __m256d { static_assert_uimm_bits!(IMM1, 1); - simd_shuffle!( - a, - _mm256_castpd128_pd256(b), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], - ) + unsafe { + simd_shuffle!( + a, + _mm256_castpd128_pd256(b), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ) + } } /// Copies `a` to result, then inserts 128 bits from `b` into result @@ -1333,14 +1381,16 @@ pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d) -> _ )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { +pub fn _mm256_insertf128_si256(a: __m256i, b: __m128i) -> __m256i { static_assert_uimm_bits!(IMM1, 1); - let dst: i64x4 = simd_shuffle!( - a.as_i64x4(), - _mm256_castsi128_si256(b).as_i64x4(), - [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], - ); - transmute(dst) + unsafe { + let dst: i64x4 = simd_shuffle!( + a.as_i64x4(), + _mm256_castsi128_si256(b).as_i64x4(), + [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize], + ); + transmute(dst) + } } /// Copies `a` to result, and inserts the 8-bit integer `i` into result @@ -1352,9 +1402,9 @@ pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i) - // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { +pub fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i { static_assert_uimm_bits!(INDEX, 5); - transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) } } /// Copies `a` to result, and inserts the 16-bit integer `i` into result @@ -1366,9 +1416,9 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8) -> __m256i // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { +pub fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m256i { static_assert_uimm_bits!(INDEX, 4); - transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) } } /// Copies `a` to result, and inserts the 32-bit integer `i` into result @@ -1380,9 +1430,9 @@ pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16) -> __m25 // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { +pub fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m256i { static_assert_uimm_bits!(INDEX, 3); - transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) } } /// Loads 256-bits (composed of 4 packed double-precision (64-bit) @@ -1673,8 +1723,8 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { - simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) +pub fn _mm256_movehdup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements @@ -1685,8 +1735,8 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { - simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) +pub fn _mm256_moveldup_ps(a: __m256) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements @@ -1697,8 +1747,8 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { - simd_shuffle!(a, a, [0, 0, 2, 2]) +pub fn _mm256_movedup_pd(a: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) } } /// Loads 256-bits of integer data from unaligned memory into result. @@ -1807,8 +1857,8 @@ pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrcpps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { - vrcpps(a) +pub fn _mm256_rcp_ps(a: __m256) -> __m256 { + unsafe { vrcpps(a) } } /// Computes the approximate reciprocal square root of packed single-precision @@ -1820,8 +1870,8 @@ pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { - vrsqrtps(a) +pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 { + unsafe { vrsqrtps(a) } } /// Unpacks and interleave double-precision (64-bit) floating-point elements @@ -1832,8 +1882,8 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle!(a, b, [1, 5, 3, 7]) +pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1844,8 +1894,8 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) +pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } } /// Unpacks and interleave double-precision (64-bit) floating-point elements @@ -1856,8 +1906,8 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { - simd_shuffle!(a, b, [0, 4, 2, 6]) +pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1868,8 +1918,8 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { - simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) +pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { + unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1882,8 +1932,8 @@ pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { - ptestz256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1896,8 +1946,8 @@ pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { - ptestc256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and @@ -1911,8 +1961,8 @@ pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { - ptestnzc256(a.as_i64x4(), b.as_i64x4()) +pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { + unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1928,8 +1978,8 @@ pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { - vtestzpd256(a, b) +pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestzpd256(a, b) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1945,8 +1995,8 @@ pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { - vtestcpd256(a, b) +pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestcpd256(a, b) } } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) @@ -1963,8 +2013,8 @@ pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { - vtestnzcpd256(a, b) +pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { + unsafe { vtestnzcpd256(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -1980,8 +2030,8 @@ pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { - vtestzpd(a, b) +pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestzpd(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -1997,8 +2047,8 @@ pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { - vtestcpd(a, b) +pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestcpd(a, b) } } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) @@ -2015,8 +2065,8 @@ pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { - vtestnzcpd(a, b) +pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { + unsafe { vtestnzcpd(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2032,8 +2082,8 @@ pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { - vtestzps256(a, b) +pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestzps256(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2049,8 +2099,8 @@ pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { - vtestcps256(a, b) +pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestcps256(a, b) } } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) @@ -2067,8 +2117,8 @@ pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { - vtestnzcps256(a, b) +pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { + unsafe { vtestnzcps256(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2084,8 +2134,8 @@ pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { - vtestzps(a, b) +pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestzps(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2101,8 +2151,8 @@ pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { - vtestcps(a, b) +pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestcps(a, b) } } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) @@ -2119,8 +2169,8 @@ pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { - vtestnzcps(a, b) +pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { + unsafe { vtestnzcps(a, b) } } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2132,11 +2182,13 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { +pub fn _mm256_movemask_pd(a: __m256d) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); + simd_bitmask::(mask).into() + } } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2148,11 +2200,13 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { +pub fn _mm256_movemask_ps(a: __m256) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); + simd_bitmask::(mask).into() + } } /// Returns vector of type __m256d with all elements set to zero. @@ -2162,8 +2216,8 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_pd() -> __m256d { - const { mem::zeroed() } +pub fn _mm256_setzero_pd() -> __m256d { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256 with all elements set to zero. @@ -2173,8 +2227,8 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_ps() -> __m256 { - const { mem::zeroed() } +pub fn _mm256_setzero_ps() -> __m256 { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256i with all elements set to zero. @@ -2184,8 +2238,8 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxor))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setzero_si256() -> __m256i { - const { mem::zeroed() } +pub fn _mm256_setzero_si256() -> __m256i { + const { unsafe { mem::zeroed() } } } /// Sets packed double-precision (64-bit) floating-point elements in returned @@ -2197,7 +2251,7 @@ pub unsafe fn _mm256_setzero_si256() -> __m256i { // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { +pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { _mm256_setr_pd(d, c, b, a) } @@ -2209,16 +2263,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_ps( - a: f32, - b: f32, - c: f32, - d: f32, - e: f32, - f: f32, - g: f32, - h: f32, -) -> __m256 { +pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { _mm256_setr_ps(h, g, f, e, d, c, b, a) } @@ -2229,7 +2274,7 @@ pub unsafe fn _mm256_set_ps( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi8( +pub fn _mm256_set_epi8( e00: i8, e01: i8, e02: i8, @@ -2279,7 +2324,7 @@ pub unsafe fn _mm256_set_epi8( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi16( +pub fn _mm256_set_epi16( e00: i16, e01: i16, e02: i16, @@ -2313,7 +2358,7 @@ pub unsafe fn _mm256_set_epi16( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi32( +pub fn _mm256_set_epi32( e0: i32, e1: i32, e2: i32, @@ -2333,7 +2378,7 @@ pub unsafe fn _mm256_set_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { +pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { _mm256_setr_epi64x(d, c, b, a) } @@ -2345,7 +2390,7 @@ pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { +pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { __m256d([a, b, c, d]) } @@ -2357,16 +2402,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_ps( - a: f32, - b: f32, - c: f32, - d: f32, - e: f32, - f: f32, - g: f32, - h: f32, -) -> __m256 { +pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 { __m256([a, b, c, d, e, f, g, h]) } @@ -2378,7 +2414,7 @@ pub unsafe fn _mm256_setr_ps( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi8( +pub fn _mm256_setr_epi8( e00: i8, e01: i8, e02: i8, @@ -2412,13 +2448,15 @@ pub unsafe fn _mm256_setr_epi8( e30: i8, e31: i8, ) -> __m256i { - #[rustfmt::skip] - transmute(i8x32::new( - e00, e01, e02, e03, e04, e05, e06, e07, - e08, e09, e10, e11, e12, e13, e14, e15, - e16, e17, e18, e19, e20, e21, e22, e23, - e24, e25, e26, e27, e28, e29, e30, e31, - )) + unsafe { + #[rustfmt::skip] + transmute(i8x32::new( + e00, e01, e02, e03, e04, e05, e06, e07, + e08, e09, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31, + )) + } } /// Sets packed 16-bit integers in returned vector with the supplied values in @@ -2429,7 +2467,7 @@ pub unsafe fn _mm256_setr_epi8( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi16( +pub fn _mm256_setr_epi16( e00: i16, e01: i16, e02: i16, @@ -2447,13 +2485,15 @@ pub unsafe fn _mm256_setr_epi16( e14: i16, e15: i16, ) -> __m256i { - #[rustfmt::skip] - transmute(i16x16::new( - e00, e01, e02, e03, - e04, e05, e06, e07, - e08, e09, e10, e11, - e12, e13, e14, e15, - )) + unsafe { + #[rustfmt::skip] + transmute(i16x16::new( + e00, e01, e02, e03, + e04, e05, e06, e07, + e08, e09, e10, e11, + e12, e13, e14, e15, + )) + } } /// Sets packed 32-bit integers in returned vector with the supplied values in @@ -2464,7 +2504,7 @@ pub unsafe fn _mm256_setr_epi16( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi32( +pub fn _mm256_setr_epi32( e0: i32, e1: i32, e2: i32, @@ -2474,7 +2514,7 @@ pub unsafe fn _mm256_setr_epi32( e6: i32, e7: i32, ) -> __m256i { - transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) + unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } } /// Sets packed 64-bit integers in returned vector with the supplied values in @@ -2485,8 +2525,8 @@ pub unsafe fn _mm256_setr_epi32( #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { - transmute(i64x4::new(a, b, c, d)) +pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { + unsafe { transmute(i64x4::new(a, b, c, d)) } } /// Broadcasts double-precision (64-bit) floating-point value `a` to all @@ -2497,7 +2537,7 @@ pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { +pub fn _mm256_set1_pd(a: f64) -> __m256d { _mm256_setr_pd(a, a, a, a) } @@ -2509,7 +2549,7 @@ pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { +pub fn _mm256_set1_ps(a: f32) -> __m256 { _mm256_setr_ps(a, a, a, a, a, a, a, a) } @@ -2521,7 +2561,7 @@ pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { +pub fn _mm256_set1_epi8(a: i8) -> __m256i { #[rustfmt::skip] _mm256_setr_epi8( a, a, a, a, a, a, a, a, @@ -2541,7 +2581,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { +pub fn _mm256_set1_epi16(a: i16) -> __m256i { _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } @@ -2553,7 +2593,7 @@ pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { +pub fn _mm256_set1_epi32(a: i32) -> __m256i { _mm256_setr_epi32(a, a, a, a, a, a, a, a) } @@ -2567,7 +2607,7 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { #[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { +pub fn _mm256_set1_epi64x(a: i64) -> __m256i { _mm256_setr_epi64x(a, a, a, a) } @@ -2579,8 +2619,8 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { - transmute(a) +pub fn _mm256_castpd_ps(a: __m256d) -> __m256 { + unsafe { transmute(a) } } /// Cast vector of type __m256 to type __m256d. @@ -2591,8 +2631,8 @@ pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { - transmute(a) +pub fn _mm256_castps_pd(a: __m256) -> __m256d { + unsafe { transmute(a) } } /// Casts vector of type __m256 to type __m256i. @@ -2603,8 +2643,8 @@ pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { - transmute(a) +pub fn _mm256_castps_si256(a: __m256) -> __m256i { + unsafe { transmute(a) } } /// Casts vector of type __m256i to type __m256. @@ -2615,8 +2655,8 @@ pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { - transmute(a) +pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 { + unsafe { transmute(a) } } /// Casts vector of type __m256d to type __m256i. @@ -2627,8 +2667,8 @@ pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { - transmute(a) +pub fn _mm256_castpd_si256(a: __m256d) -> __m256i { + unsafe { transmute(a) } } /// Casts vector of type __m256i to type __m256d. @@ -2639,8 +2679,8 @@ pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { - transmute(a) +pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d { + unsafe { transmute(a) } } /// Casts vector of type __m256 to type __m128. @@ -2651,8 +2691,8 @@ pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { - simd_shuffle!(a, a, [0, 1, 2, 3]) +pub fn _mm256_castps256_ps128(a: __m256) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } /// Casts vector of type __m256d to type __m128d. @@ -2663,8 +2703,8 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { - simd_shuffle!(a, a, [0, 1]) +pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 1]) } } /// Casts vector of type __m256i to type __m128i. @@ -2675,10 +2715,12 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { - let a = a.as_i64x4(); - let dst: i64x2 = simd_shuffle!(a, a, [0, 1]); - transmute(dst) +pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i64x4(); + let dst: i64x2 = simd_shuffle!(a, a, [0, 1]); + transmute(dst) + } } /// Casts vector of type __m128 to type __m256; @@ -2690,8 +2732,8 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { - simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm256_castps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Casts vector of type __m128d to type __m256d; @@ -2703,8 +2745,8 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { - simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) +pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) } } /// Casts vector of type __m128i to type __m256i; @@ -2716,11 +2758,13 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { - let a = a.as_i64x2(); - let undefined = i64x2::ZERO; - let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); - transmute(dst) +pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i64x2(); + let undefined = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); + transmute(dst) + } } /// Constructs a 256-bit floating-point vector of `[8 x float]` from a @@ -2733,8 +2777,8 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { - simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Constructs a 256-bit integer vector from a 128-bit integer vector. @@ -2747,10 +2791,12 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { - let b = i64x2::ZERO; - let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); - transmute(dst) +pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { + unsafe { + let b = i64x2::ZERO; + let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); + transmute(dst) + } } /// Constructs a 256-bit floating-point vector of `[4 x double]` from a @@ -2764,8 +2810,8 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { - simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) +pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) } } /// Returns vector of type `__m256` with indeterminate elements. @@ -2777,8 +2823,8 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_ps() -> __m256 { - const { mem::zeroed() } +pub fn _mm256_undefined_ps() -> __m256 { + const { unsafe { mem::zeroed() } } } /// Returns vector of type `__m256d` with indeterminate elements. @@ -2790,8 +2836,8 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_pd() -> __m256d { - const { mem::zeroed() } +pub fn _mm256_undefined_pd() -> __m256d { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m256i with with indeterminate elements. @@ -2803,8 +2849,8 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d { #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_undefined_si256() -> __m256i { - const { mem::zeroed() } +pub fn _mm256_undefined_si256() -> __m256i { + const { unsafe { mem::zeroed() } } } /// Sets packed __m256 returned vector with the supplied values. @@ -2814,8 +2860,8 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { - simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { + unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Sets packed __m256d returned vector with the supplied values. @@ -2825,10 +2871,12 @@ pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { - let hi: __m128 = transmute(hi); - let lo: __m128 = transmute(lo); - transmute(_mm256_set_m128(hi, lo)) +pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } } /// Sets packed __m256i returned vector with the supplied values. @@ -2838,10 +2886,12 @@ pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { - let hi: __m128 = transmute(hi); - let lo: __m128 = transmute(lo); - transmute(_mm256_set_m128(hi, lo)) +pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { + unsafe { + let hi: __m128 = transmute(hi); + let lo: __m128 = transmute(lo); + transmute(_mm256_set_m128(hi, lo)) + } } /// Sets packed __m256 returned vector with the supplied values. @@ -2851,7 +2901,7 @@ pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { +pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } @@ -2862,7 +2912,7 @@ pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { +pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } @@ -2873,7 +2923,7 @@ pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { +pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { _mm256_set_m128i(hi, lo) } @@ -2978,8 +3028,8 @@ pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movss))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { - simd_extract!(a, 0) +pub fn _mm256_cvtss_f32(a: __m256) -> f32 { + unsafe { simd_extract!(a, 0) } } // LLVM intrinsics used in the above functions diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 10c1f2de8d..20c61449a7 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -33,10 +33,12 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { - let a = a.as_i32x8(); - let r = simd_select::(simd_lt(a, i32x8::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm256_abs_epi32(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let r = simd_select::(simd_lt(a, i32x8::ZERO), simd_neg(a), a); + transmute(r) + } } /// Computes the absolute values of packed 16-bit integers in `a`. @@ -46,10 +48,12 @@ pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { - let a = a.as_i16x16(); - let r = simd_select::(simd_lt(a, i16x16::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm256_abs_epi16(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let r = simd_select::(simd_lt(a, i16x16::ZERO), simd_neg(a), a); + transmute(r) + } } /// Computes the absolute values of packed 8-bit integers in `a`. @@ -59,10 +63,12 @@ pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { - let a = a.as_i8x32(); - let r = simd_select::(simd_lt(a, i8x32::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm256_abs_epi8(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let r = simd_select::(simd_lt(a, i8x32::ZERO), simd_neg(a), a); + transmute(r) + } } /// Adds packed 64-bit integers in `a` and `b`. @@ -72,8 +78,8 @@ pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_add(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) } } /// Adds packed 32-bit integers in `a` and `b`. @@ -83,8 +89,8 @@ pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_add(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) } } /// Adds packed 16-bit integers in `a` and `b`. @@ -94,8 +100,8 @@ pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_add(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) } } /// Adds packed 8-bit integers in `a` and `b`. @@ -105,8 +111,8 @@ pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_add(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) } } /// Adds packed 8-bit integers in `a` and `b` using saturation. @@ -116,8 +122,8 @@ pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) } } /// Adds packed 16-bit integers in `a` and `b` using saturation. @@ -127,8 +133,8 @@ pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) } } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. @@ -138,8 +144,8 @@ pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) } } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. @@ -149,8 +155,8 @@ pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) } } /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary @@ -162,160 +168,162 @@ pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - // If palignr is shifting the pair of vectors more than the size of two - // lanes, emit zero. - if IMM8 >= 32 { - return _mm256_setzero_si256(); - } - // If palignr is shifting the pair of input vectors more than one lane, - // but less than two lanes, convert to shifting in zeroes. - let (a, b) = if IMM8 > 16 { - (_mm256_setzero_si256(), a) - } else { - (a, b) - }; + unsafe { + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if IMM8 >= 32 { + return _mm256_setzero_si256(); + } + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + let (a, b) = if IMM8 > 16 { + (_mm256_setzero_si256(), a) + } else { + (a, b) + }; - let a = a.as_i8x32(); - let b = b.as_i8x32(); + let a = a.as_i8x32(); + let b = b.as_i8x32(); - if IMM8 == 16 { - return transmute(a); - } + if IMM8 == 16 { + return transmute(a); + } - let r: i8x32 = match IMM8 % 16 { - 0 => simd_shuffle!( - b, - a, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, - ], - ), - 1 => simd_shuffle!( - b, - a, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 48, - ], - ), - 2 => simd_shuffle!( - b, - a, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 48, 49, - ], - ), - 3 => simd_shuffle!( - b, - a, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, - ], - ), - 4 => simd_shuffle!( - b, - a, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, - ], - ), - 5 => simd_shuffle!( - b, - a, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, - ], - ), - 6 => simd_shuffle!( - b, - a, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, - ], - ), - 7 => simd_shuffle!( - b, - a, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, - ], - ), - 8 => simd_shuffle!( - b, - a, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28, - 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, - ], - ), - 9 => simd_shuffle!( - b, - a, - [ - 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29, - 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, - ], - ), - 10 => simd_shuffle!( - b, - a, - [ - 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30, - 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - ], - ), - 11 => simd_shuffle!( - b, - a, - [ - 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - ], - ), - 12 => simd_shuffle!( - b, - a, - [ - 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48, - 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - ], - ), - 13 => simd_shuffle!( - b, - a, - [ - 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - ], - ), - 14 => simd_shuffle!( - b, - a, - [ - 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - ], - ), - 15 => simd_shuffle!( - b, - a, - [ - 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, - ], - ), - _ => unreachable_unchecked(), - }; - transmute(r) + let r: i8x32 = match IMM8 % 16 { + 0 => simd_shuffle!( + b, + a, + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ], + ), + 1 => simd_shuffle!( + b, + a, + [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, + ], + ), + 2 => simd_shuffle!( + b, + a, + [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, + ], + ), + 3 => simd_shuffle!( + b, + a, + [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, + ], + ), + 4 => simd_shuffle!( + b, + a, + [ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, + ], + ), + 5 => simd_shuffle!( + b, + a, + [ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, + ], + ), + 6 => simd_shuffle!( + b, + a, + [ + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, + ], + ), + 7 => simd_shuffle!( + b, + a, + [ + 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, + ], + ), + 8 => simd_shuffle!( + b, + a, + [ + 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, + 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, + ], + ), + 9 => simd_shuffle!( + b, + a, + [ + 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, + 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, + ], + ), + 10 => simd_shuffle!( + b, + a, + [ + 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, + 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + ], + ), + 11 => simd_shuffle!( + b, + a, + [ + 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, + 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + ], + ), + 12 => simd_shuffle!( + b, + a, + [ + 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + ], + ), + 13 => simd_shuffle!( + b, + a, + [ + 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + ], + ), + 14 => simd_shuffle!( + b, + a, + [ + 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + ], + ), + 15 => simd_shuffle!( + b, + a, + [ + 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + ], + ), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Computes the bitwise AND of 256 bits (representing integer data) @@ -326,8 +334,8 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_and(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) } } /// Computes the bitwise NOT of 256 bits (representing integer data) @@ -338,12 +346,14 @@ pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { - let all_ones = _mm256_set1_epi8(-1); - transmute(simd_and( - simd_xor(a.as_i64x4(), all_ones.as_i64x4()), - b.as_i64x4(), - )) +pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let all_ones = _mm256_set1_epi8(-1); + transmute(simd_and( + simd_xor(a.as_i64x4(), all_ones.as_i64x4()), + b.as_i64x4(), + )) + } } /// Averages packed unsigned 16-bit integers in `a` and `b`. @@ -353,11 +363,13 @@ pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { - let a = simd_cast::<_, u32x16>(a.as_u16x16()); - let b = simd_cast::<_, u32x16>(b.as_u16x16()); - let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1)); - transmute(simd_cast::<_, u16x16>(r)) +pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u32x16>(a.as_u16x16()); + let b = simd_cast::<_, u32x16>(b.as_u16x16()); + let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1)); + transmute(simd_cast::<_, u16x16>(r)) + } } /// Averages packed unsigned 8-bit integers in `a` and `b`. @@ -367,11 +379,13 @@ pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { - let a = simd_cast::<_, u16x32>(a.as_u8x32()); - let b = simd_cast::<_, u16x32>(b.as_u8x32()); - let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1)); - transmute(simd_cast::<_, u8x32>(r)) +pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u16x32>(a.as_u8x32()); + let b = simd_cast::<_, u16x32>(b.as_u8x32()); + let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1)); + transmute(simd_cast::<_, u8x32>(r)) + } } /// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`. @@ -382,21 +396,23 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM4, 4); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let r: i32x4 = simd_shuffle!( - a, - b, - [ - [0, 4, 0, 4][IMM4 as usize & 0b11], - [1, 1, 5, 5][IMM4 as usize & 0b11], - [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11], - [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11], - ], - ); - transmute(r) + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r: i32x4 = simd_shuffle!( + a, + b, + [ + [0, 4, 0, 4][IMM4 as usize & 0b11], + [1, 1, 5, 5][IMM4 as usize & 0b11], + [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11], + [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11], + ], + ); + transmute(r) + } } /// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`. @@ -407,25 +423,27 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i) -> __m128 #[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle!( - a, - b, - [ - [0, 8, 0, 8][IMM8 as usize & 0b11], - [1, 1, 9, 9][IMM8 as usize & 0b11], - [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11], - [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11], - [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11], - [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11], - [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11], - [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11], - ], - ); - transmute(r) + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r: i32x8 = simd_shuffle!( + a, + b, + [ + [0, 8, 0, 8][IMM8 as usize & 0b11], + [1, 1, 9, 9][IMM8 as usize & 0b11], + [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11], + [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11], + [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11], + [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11], + [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11], + [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11], + ], + ); + transmute(r) + } } /// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`. @@ -436,34 +454,36 @@ pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i) -> __m #[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x16(); - let b = b.as_i16x16(); - - let r: i16x16 = simd_shuffle!( - a, - b, - [ - [0, 16, 0, 16][IMM8 as usize & 0b11], - [1, 1, 17, 17][IMM8 as usize & 0b11], - [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11], - [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11], - [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11], - [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11], - [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11], - [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11], - [8, 24, 8, 24][IMM8 as usize & 0b11], - [9, 9, 25, 25][IMM8 as usize & 0b11], - [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11], - [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11], - [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11], - [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11], - [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11], - [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11], - ], - ); - transmute(r) + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + + let r: i16x16 = simd_shuffle!( + a, + b, + [ + [0, 16, 0, 16][IMM8 as usize & 0b11], + [1, 1, 17, 17][IMM8 as usize & 0b11], + [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11], + [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11], + [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11], + [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11], + [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11], + [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11], + [8, 24, 8, 24][IMM8 as usize & 0b11], + [9, 9, 25, 25][IMM8 as usize & 0b11], + [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11], + [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11], + [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11], + [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11], + [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11], + [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11], + ], + ); + transmute(r) + } } /// Blends packed 8-bit integers from `a` and `b` using `mask`. @@ -473,9 +493,11 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { - let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO); - transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32())) +pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { + unsafe { + let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO); + transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32())) + } } /// Broadcasts the low packed 8-bit integer from `a` to all elements of @@ -486,9 +508,11 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25 #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { - let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]); - transmute::(ret) +pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]); + transmute::(ret) + } } /// Broadcasts the low packed 8-bit integer from `a` to all elements of @@ -499,9 +523,11 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]); - transmute::(ret) +pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]); + transmute::(ret) + } } // N.B., `simd_shuffle4` with integer data types for `a` and `b` is @@ -514,9 +540,11 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { - let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]); - transmute::(ret) +pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]); + transmute::(ret) + } } // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is @@ -529,9 +557,11 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]); - transmute::(ret) +pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]); + transmute::(ret) + } } /// Broadcasts the low packed 64-bit integer from `a` to all elements of @@ -544,9 +574,11 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { // See https://github.com/rust-lang/stdarch/issues/791 #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { - let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); - transmute::(ret) +pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); + transmute::(ret) + } } /// Broadcasts the low packed 64-bit integer from `a` to all elements of @@ -557,9 +589,11 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); - transmute::(ret) +pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); + transmute::(ret) + } } /// Broadcasts the low double-precision (64-bit) floating-point element @@ -570,8 +604,8 @@ pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { - simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) +pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) } } /// Broadcasts the low double-precision (64-bit) floating-point element @@ -582,8 +616,8 @@ pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { - simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) +pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { + unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) } } /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in @@ -593,9 +627,11 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { #[inline] #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); - transmute::(ret) +pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); + transmute::(ret) + } } // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or @@ -607,9 +643,11 @@ pub unsafe fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); - transmute::(ret) +pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); + transmute::(ret) + } } /// Broadcasts the low single-precision (32-bit) floating-point element @@ -620,8 +658,8 @@ pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { - simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) +pub fn _mm_broadcastss_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) } } /// Broadcasts the low single-precision (32-bit) floating-point element @@ -632,8 +670,8 @@ pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { - simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) +pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) } } /// Broadcasts the low packed 16-bit integer from a to all elements of @@ -644,9 +682,11 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { - let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]); - transmute::(ret) +pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { + unsafe { + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]); + transmute::(ret) + } } /// Broadcasts the low packed 16-bit integer from a to all elements of @@ -657,9 +697,11 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { - let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]); - transmute::(ret) +pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { + unsafe { + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]); + transmute::(ret) + } } /// Compares packed 64-bit integers in `a` and `b` for equality. @@ -669,8 +711,8 @@ pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) } } /// Compares packed 32-bit integers in `a` and `b` for equality. @@ -680,8 +722,8 @@ pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) } } /// Compares packed 16-bit integers in `a` and `b` for equality. @@ -691,8 +733,8 @@ pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) } } /// Compares packed 8-bit integers in `a` and `b` for equality. @@ -702,8 +744,8 @@ pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) } } /// Compares packed 64-bit integers in `a` and `b` for greater-than. @@ -713,8 +755,8 @@ pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) } } /// Compares packed 32-bit integers in `a` and `b` for greater-than. @@ -724,8 +766,8 @@ pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) } } /// Compares packed 16-bit integers in `a` and `b` for greater-than. @@ -735,8 +777,8 @@ pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) } } /// Compares packed 8-bit integers in `a` and `b` for greater-than. @@ -746,8 +788,8 @@ pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute::(simd_gt(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute::(simd_gt(a.as_i8x32(), b.as_i8x32())) } } /// Sign-extend 16-bit integers to 32-bit integers. @@ -757,8 +799,8 @@ pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_i16x8())) +pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i16x8())) } } /// Sign-extend 16-bit integers to 64-bit integers. @@ -768,10 +810,12 @@ pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { - let a = a.as_i16x8(); - let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute::(simd_cast(v64)) +pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i16x8(); + let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v64)) + } } /// Sign-extend 32-bit integers to 64-bit integers. @@ -781,8 +825,8 @@ pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_i32x4())) +pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i32x4())) } } /// Sign-extend 8-bit integers to 16-bit integers. @@ -792,8 +836,8 @@ pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_i8x16())) +pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_i8x16())) } } /// Sign-extend 8-bit integers to 32-bit integers. @@ -803,10 +847,12 @@ pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { - let a = a.as_i8x16(); - let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v64)) +pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i8x16(); + let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } } /// Sign-extend 8-bit integers to 64-bit integers. @@ -816,10 +862,12 @@ pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { - let a = a.as_i8x16(); - let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute::(simd_cast(v32)) +pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i8x16(); + let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v32)) + } } /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit @@ -830,8 +878,8 @@ pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_u16x8())) +pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u16x8())) } } /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit @@ -842,10 +890,12 @@ pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { - let a = a.as_u16x8(); - let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute::(simd_cast(v64)) +pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u16x8(); + let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v64)) + } } /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers. @@ -855,8 +905,8 @@ pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_u32x4())) +pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u32x4())) } } /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers. @@ -866,8 +916,8 @@ pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { - transmute::(simd_cast(a.as_u8x16())) +pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { + unsafe { transmute::(simd_cast(a.as_u8x16())) } } /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit @@ -878,10 +928,12 @@ pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { - let a = a.as_u8x16(); - let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v64)) +pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u8x16(); + let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } } /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit @@ -892,10 +944,12 @@ pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { - let a = a.as_u8x16(); - let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute::(simd_cast(v32)) +pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { + unsafe { + let a = a.as_u8x16(); + let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute::(simd_cast(v32)) + } } /// Extracts 128 bits (of integer data) from `a` selected with `IMM1`. @@ -909,12 +963,14 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { )] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extracti128_si256(a: __m256i) -> __m128i { +pub fn _mm256_extracti128_si256(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - let a = a.as_i64x4(); - let b = i64x4::ZERO; - let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); - transmute(dst) + unsafe { + let a = a.as_i64x4(); + let b = i64x4::ZERO; + let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); + transmute(dst) + } } /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`. @@ -924,8 +980,8 @@ pub unsafe fn _mm256_extracti128_si256(a: __m256i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(phaddw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } } /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. @@ -935,8 +991,8 @@ pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(phaddd(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } } /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` @@ -947,8 +1003,8 @@ pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) } } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`. @@ -958,8 +1014,8 @@ pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(phsubw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) } } /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. @@ -969,8 +1025,8 @@ pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(phsubd(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) } } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` @@ -981,8 +1037,8 @@ pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) } } /// Returns values from `slice` at offsets determined by `offsets * scale`, @@ -1731,12 +1787,14 @@ pub unsafe fn _mm256_mask_i64gather_pd( )] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i) -> __m256i { +pub fn _mm256_inserti128_si256(a: __m256i, b: __m128i) -> __m256i { static_assert_uimm_bits!(IMM1, 1); - let a = a.as_i64x4(); - let b = _mm256_castsi128_si256(b).as_i64x4(); - let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); - transmute(dst) + unsafe { + let a = a.as_i64x4(); + let b = _mm256_castsi128_si256(b).as_i64x4(); + let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]); + transmute(dst) + } } /// Multiplies packed signed 16-bit integers in `a` and `b`, producing @@ -1748,8 +1806,8 @@ pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i) - #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } } /// Vertically multiplies each unsigned 8-bit integer from `a` with the @@ -1762,8 +1820,8 @@ pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } } /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` @@ -1878,10 +1936,12 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25 #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i16x16(); - let b = b.as_i16x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed @@ -1892,10 +1952,12 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i32x8(); - let b = b.as_i32x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed @@ -1906,10 +1968,12 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i8x32(); - let b = b.as_i8x32(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns @@ -1920,10 +1984,12 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u16x16(); - let b = b.as_u16x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns @@ -1934,10 +2000,12 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u32x8(); - let b = b.as_u32x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns @@ -1948,10 +2016,12 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u8x32(); - let b = b.as_u8x32(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -1962,10 +2032,12 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i16x16(); - let b = b.as_i16x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed @@ -1976,10 +2048,12 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i32x8(); - let b = b.as_i32x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed @@ -1990,10 +2064,12 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i8x32(); - let b = b.as_i8x32(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns @@ -2004,10 +2080,12 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u16x16(); - let b = b.as_u16x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns @@ -2018,10 +2096,12 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u32x8(); - let b = b.as_u32x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns @@ -2032,10 +2112,12 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u8x32(); - let b = b.as_u8x32(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Creates mask from the most significant bit of each 8-bit element in `a`, @@ -2046,10 +2128,12 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { - let z = i8x32::ZERO; - let m: i8x32 = simd_lt(a.as_i8x32(), z); - simd_bitmask::<_, u32>(m) as i32 +pub fn _mm256_movemask_epi8(a: __m256i) -> i32 { + unsafe { + let z = i8x32::ZERO; + let m: i8x32 = simd_lt(a.as_i8x32(), z); + simd_bitmask::<_, u32>(m) as i32 + } } /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned @@ -2066,9 +2150,9 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { #[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) + unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) } } /// Multiplies the low 32-bit integers from each packed 64-bit element in @@ -2081,10 +2165,12 @@ pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuldq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { - let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4())); - let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4())); - transmute(simd_mul(a, b)) +pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4())); + let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4())); + transmute(simd_mul(a, b)) + } } /// Multiplies the low unsigned 32-bit integers from each packed 64-bit @@ -2097,11 +2183,13 @@ pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuludq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u64x4(); - let b = b.as_u64x4(); - let mask = u64x4::splat(u32::MAX.into()); - transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) +pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let mask = u64x4::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } } /// Multiplies the packed 16-bit integers in `a` and `b`, producing @@ -2113,11 +2201,13 @@ pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { - let a = simd_cast::<_, i32x16>(a.as_i16x16()); - let b = simd_cast::<_, i32x16>(b.as_i16x16()); - let r = simd_shr(simd_mul(a, b), i32x16::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, i32x16>(a.as_i16x16()); + let b = simd_cast::<_, i32x16>(b.as_i16x16()); + let r = simd_shr(simd_mul(a, b), i32x16::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing @@ -2129,11 +2219,13 @@ pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { - let a = simd_cast::<_, u32x16>(a.as_u16x16()); - let b = simd_cast::<_, u32x16>(b.as_u16x16()); - let r = simd_shr(simd_mul(a, b), u32x16::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = simd_cast::<_, u32x16>(a.as_u16x16()); + let b = simd_cast::<_, u32x16>(b.as_u16x16()); + let r = simd_shr(simd_mul(a, b), u32x16::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiplies the packed 16-bit integers in `a` and `b`, producing @@ -2145,8 +2237,8 @@ pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmullw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) } } /// Multiplies the packed 32-bit integers in `a` and `b`, producing @@ -2158,8 +2250,8 @@ pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) } } /// Multiplies packed 16-bit integers in `a` and `b`, producing @@ -2172,8 +2264,8 @@ pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhrsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) } } /// Computes the bitwise OR of 256 bits (representing integer data) in `a` @@ -2184,8 +2276,8 @@ pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_or(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -2196,8 +2288,8 @@ pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpacksswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(packsswb(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -2208,8 +2300,8 @@ pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(packssdw(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -2220,8 +2312,8 @@ pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(packuswb(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -2232,8 +2324,8 @@ pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(packusdw(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) } } /// Permutes packed 32-bit integers from `a` according to the content of `b`. @@ -2246,8 +2338,8 @@ pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(permd(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) } } /// Permutes 64-bit integers from `a` using control mask `imm8`. @@ -2258,20 +2350,22 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { +pub fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let zero = i64x4::ZERO; - let r: i64x4 = simd_shuffle!( - a.as_i64x4(), - zero, - [ - IMM8 as u32 & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ], - ); - transmute(r) + unsafe { + let zero = i64x4::ZERO; + let r: i64x4 = simd_shuffle!( + a.as_i64x4(), + zero, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ); + transmute(r) + } } /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`. @@ -2282,9 +2376,9 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) + unsafe { transmute(vperm2i128(a.as_i64x4(), b.as_i64x4(), IMM8 as i8)) } } /// Shuffles 64-bit floating-point elements in `a` across lanes using the @@ -2296,18 +2390,20 @@ pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i) #[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { +pub fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); - simd_shuffle!( - a, - _mm256_undefined_pd(), - [ - IMM8 as u32 & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ], - ) + unsafe { + simd_shuffle!( + a, + _mm256_undefined_pd(), + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ) + } } /// Shuffles eight 32-bit floating-point elements in `a` across lanes using @@ -2318,8 +2414,8 @@ pub unsafe fn _mm256_permute4x64_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { - permps(a, idx.as_i32x8()) +pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { + unsafe { permps(a, idx.as_i32x8()) } } /// Computes the absolute differences of packed unsigned 8-bit integers in `a` @@ -2332,8 +2428,8 @@ pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsadbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { - transmute(psadbw(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) } } /// Shuffles bytes from `a` according to the content of `b`. @@ -2370,8 +2466,8 @@ pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(pshufb(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) } } /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in @@ -2409,23 +2505,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vshufps, MASK = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { +pub fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { static_assert_uimm_bits!(MASK, 8); - let r: i32x8 = simd_shuffle!( - a.as_i32x8(), - a.as_i32x8(), - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - (MASK as u32 >> 4) & 0b11, - (MASK as u32 >> 6) & 0b11, - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - ], - ); - transmute(r) + unsafe { + let r: i32x8 = simd_shuffle!( + a.as_i32x8(), + a.as_i32x8(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ); + transmute(r) + } } /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using @@ -2438,32 +2536,34 @@ pub unsafe fn _mm256_shuffle_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { +pub fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle!( - a, - a, - [ - 0, - 1, - 2, - 3, - 4 + (IMM8 as u32 & 0b11), - 4 + ((IMM8 as u32 >> 2) & 0b11), - 4 + ((IMM8 as u32 >> 4) & 0b11), - 4 + ((IMM8 as u32 >> 6) & 0b11), - 8, - 9, - 10, - 11, - 12 + (IMM8 as u32 & 0b11), - 12 + ((IMM8 as u32 >> 2) & 0b11), - 12 + ((IMM8 as u32 >> 4) & 0b11), - 12 + ((IMM8 as u32 >> 6) & 0b11), - ], - ); - transmute(r) + unsafe { + let a = a.as_i16x16(); + let r: i16x16 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + 4 + (IMM8 as u32 & 0b11), + 4 + ((IMM8 as u32 >> 2) & 0b11), + 4 + ((IMM8 as u32 >> 4) & 0b11), + 4 + ((IMM8 as u32 >> 6) & 0b11), + 8, + 9, + 10, + 11, + 12 + (IMM8 as u32 & 0b11), + 12 + ((IMM8 as u32 >> 2) & 0b11), + 12 + ((IMM8 as u32 >> 4) & 0b11), + 12 + ((IMM8 as u32 >> 6) & 0b11), + ], + ); + transmute(r) + } } /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using @@ -2476,32 +2576,34 @@ pub unsafe fn _mm256_shufflehi_epi16(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { +pub fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x16(); - let r: i16x16 = simd_shuffle!( - a, - a, - [ - 0 + (IMM8 as u32 & 0b11), - 0 + ((IMM8 as u32 >> 2) & 0b11), - 0 + ((IMM8 as u32 >> 4) & 0b11), - 0 + ((IMM8 as u32 >> 6) & 0b11), - 4, - 5, - 6, - 7, - 8 + (IMM8 as u32 & 0b11), - 8 + ((IMM8 as u32 >> 2) & 0b11), - 8 + ((IMM8 as u32 >> 4) & 0b11), - 8 + ((IMM8 as u32 >> 6) & 0b11), - 12, - 13, - 14, - 15, - ], - ); - transmute(r) + unsafe { + let a = a.as_i16x16(); + let r: i16x16 = simd_shuffle!( + a, + a, + [ + 0 + (IMM8 as u32 & 0b11), + 0 + ((IMM8 as u32 >> 2) & 0b11), + 0 + ((IMM8 as u32 >> 4) & 0b11), + 0 + ((IMM8 as u32 >> 6) & 0b11), + 4, + 5, + 6, + 7, + 8 + (IMM8 as u32 & 0b11), + 8 + ((IMM8 as u32 >> 2) & 0b11), + 8 + ((IMM8 as u32 >> 4) & 0b11), + 8 + ((IMM8 as u32 >> 6) & 0b11), + 12, + 13, + 14, + 15, + ], + ); + transmute(r) + } } /// Negates packed 16-bit integers in `a` when the corresponding signed @@ -2513,8 +2615,8 @@ pub unsafe fn _mm256_shufflelo_epi16(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(psignw(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) } } /// Negates packed 32-bit integers in `a` when the corresponding signed @@ -2526,8 +2628,8 @@ pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(psignd(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) } } /// Negates packed 8-bit integers in `a` when the corresponding signed @@ -2539,8 +2641,8 @@ pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(psignb(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) } } /// Shifts packed 16-bit integers in `a` left by `count` while @@ -2551,8 +2653,8 @@ pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { - transmute(psllw(a.as_i16x16(), count.as_i16x8())) +pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` left by `count` while @@ -2563,8 +2665,8 @@ pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { - transmute(pslld(a.as_i32x8(), count.as_i32x4())) +pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) } } /// Shifts packed 64-bit integers in `a` left by `count` while @@ -2575,8 +2677,8 @@ pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { - transmute(psllq(a.as_i64x4(), count.as_i64x2())) +pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) } } /// Shifts packed 16-bit integers in `a` left by `IMM8` while @@ -2588,12 +2690,14 @@ pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_slli_epi16(a: __m256i) -> __m256i { +pub fn _mm256_slli_epi16(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm256_setzero_si256() - } else { - transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + unsafe { + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + } } } @@ -2606,12 +2710,14 @@ pub unsafe fn _mm256_slli_epi16(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_slli_epi32(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm256_setzero_si256() - } else { - transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32))) +pub fn _mm256_slli_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32))) + } } } @@ -2624,12 +2730,14 @@ pub unsafe fn _mm256_slli_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_slli_epi64(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm256_setzero_si256() - } else { - transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))) +pub fn _mm256_slli_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))) + } } } @@ -2641,7 +2749,7 @@ pub unsafe fn _mm256_slli_epi64(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_slli_si256(a: __m256i) -> __m256i { +pub fn _mm256_slli_si256(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_bslli_epi128::(a) } @@ -2654,7 +2762,7 @@ pub unsafe fn _mm256_slli_si256(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { +pub fn _mm256_bslli_epi128(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); const fn mask(shift: i32, i: u32) -> u32 { let shift = shift as u32 & 0xff; @@ -2664,46 +2772,48 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { 32 + (i - shift) } } - let a = a.as_i8x32(); - let r: i8x32 = simd_shuffle!( - i8x32::ZERO, - a, - [ - mask(IMM8, 0), - mask(IMM8, 1), - mask(IMM8, 2), - mask(IMM8, 3), - mask(IMM8, 4), - mask(IMM8, 5), - mask(IMM8, 6), - mask(IMM8, 7), - mask(IMM8, 8), - mask(IMM8, 9), - mask(IMM8, 10), - mask(IMM8, 11), - mask(IMM8, 12), - mask(IMM8, 13), - mask(IMM8, 14), - mask(IMM8, 15), - mask(IMM8, 16), - mask(IMM8, 17), - mask(IMM8, 18), - mask(IMM8, 19), - mask(IMM8, 20), - mask(IMM8, 21), - mask(IMM8, 22), - mask(IMM8, 23), - mask(IMM8, 24), - mask(IMM8, 25), - mask(IMM8, 26), - mask(IMM8, 27), - mask(IMM8, 28), - mask(IMM8, 29), - mask(IMM8, 30), - mask(IMM8, 31), - ], - ); - transmute(r) + unsafe { + let a = a.as_i8x32(); + let r: i8x32 = simd_shuffle!( + i8x32::ZERO, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + ], + ); + transmute(r) + } } /// Shifts packed 32-bit integers in `a` left by the amount @@ -2715,8 +2825,8 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(psllvd(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` left by the amount @@ -2728,8 +2838,8 @@ pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { - transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) +pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2741,8 +2851,8 @@ pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(psllvq(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } } /// Shifts packed 64-bit integers in `a` left by the amount @@ -2754,8 +2864,8 @@ pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { - transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) +pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } } /// Shifts packed 16-bit integers in `a` right by `count` while @@ -2766,8 +2876,8 @@ pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { - transmute(psraw(a.as_i16x16(), count.as_i16x8())) +pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` right by `count` while @@ -2778,8 +2888,8 @@ pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { - transmute(psrad(a.as_i32x8(), count.as_i32x4())) +pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while @@ -2791,9 +2901,9 @@ pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srai_epi16(a: __m256i) -> __m256i { +pub fn _mm256_srai_epi16(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) + unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while @@ -2805,9 +2915,9 @@ pub unsafe fn _mm256_srai_epi16(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srai_epi32(a: __m256i) -> __m256i { +pub fn _mm256_srai_epi32(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) + unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by the @@ -2818,8 +2928,8 @@ pub unsafe fn _mm256_srai_epi32(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(psravd(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by the @@ -2830,8 +2940,8 @@ pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { - transmute(psravd256(a.as_i32x8(), count.as_i32x8())) +pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. @@ -2842,7 +2952,7 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srli_si256(a: __m256i) -> __m256i { +pub fn _mm256_srli_si256(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_bsrli_epi128::(a) } @@ -2855,142 +2965,144 @@ pub unsafe fn _mm256_srli_si256(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { +pub fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i8x32(); - let zero = i8x32::ZERO; - let r: i8x32 = match IMM8 % 16 { - 0 => simd_shuffle!( - a, - zero, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, - ], - ), - 1 => simd_shuffle!( - a, - zero, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, - ], - ), - 2 => simd_shuffle!( - a, - zero, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 32, - ], - ), - 3 => simd_shuffle!( - a, - zero, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, - ], - ), - 4 => simd_shuffle!( - a, - zero, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, - ], - ), - 5 => simd_shuffle!( - a, - zero, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, - ], - ), - 6 => simd_shuffle!( - a, - zero, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, - ], - ), - 7 => simd_shuffle!( - a, - zero, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 8 => simd_shuffle!( - a, - zero, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 9 => simd_shuffle!( - a, - zero, - [ - 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, 29, - 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 10 => simd_shuffle!( - a, - zero, - [ - 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, 30, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 11 => simd_shuffle!( - a, - zero, - [ - 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 12 => simd_shuffle!( - a, - zero, - [ - 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 13 => simd_shuffle!( - a, - zero, - [ - 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 14 => simd_shuffle!( - a, - zero, - [ - 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 15 => simd_shuffle!( - a, - zero, - [ - 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - _ => zero, - }; - transmute(r) + unsafe { + let a = a.as_i8x32(); + let zero = i8x32::ZERO; + let r: i8x32 = match IMM8 % 16 { + 0 => simd_shuffle!( + a, + zero, + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ], + ), + 1 => simd_shuffle!( + a, + zero, + [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ], + ), + 2 => simd_shuffle!( + a, + zero, + [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, + ], + ), + 3 => simd_shuffle!( + a, + zero, + [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, + ], + ), + 4 => simd_shuffle!( + a, + zero, + [ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, + ], + ), + 5 => simd_shuffle!( + a, + zero, + [ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, + ], + ), + 6 => simd_shuffle!( + a, + zero, + [ + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, + ], + ), + 7 => simd_shuffle!( + a, + zero, + [ + 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 8 => simd_shuffle!( + a, + zero, + [ + 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 9 => simd_shuffle!( + a, + zero, + [ + 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, + 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 10 => simd_shuffle!( + a, + zero, + [ + 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, + 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 11 => simd_shuffle!( + a, + zero, + [ + 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, + 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 12 => simd_shuffle!( + a, + zero, + [ + 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 13 => simd_shuffle!( + a, + zero, + [ + 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 14 => simd_shuffle!( + a, + zero, + [ + 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + 15 => simd_shuffle!( + a, + zero, + [ + 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + ], + ), + _ => zero, + }; + transmute(r) + } } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in @@ -3001,8 +3113,8 @@ pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { - transmute(psrlw(a.as_i16x16(), count.as_i16x8())) +pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in @@ -3013,8 +3125,8 @@ pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { - transmute(psrld(a.as_i32x8(), count.as_i32x4())) +pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) } } /// Shifts packed 64-bit integers in `a` right by `count` while shifting in @@ -3025,8 +3137,8 @@ pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { - transmute(psrlq(a.as_i64x4(), count.as_i64x2())) +pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in @@ -3038,12 +3150,14 @@ pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srli_epi16(a: __m256i) -> __m256i { +pub fn _mm256_srli_epi16(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm256_setzero_si256() - } else { - transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + unsafe { + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16))) + } } } @@ -3056,12 +3170,14 @@ pub unsafe fn _mm256_srli_epi16(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srli_epi32(a: __m256i) -> __m256i { +pub fn _mm256_srli_epi32(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm256_setzero_si256() - } else { - transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32))) + unsafe { + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32))) + } } } @@ -3074,12 +3190,14 @@ pub unsafe fn _mm256_srli_epi32(a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srli_epi64(a: __m256i) -> __m256i { +pub fn _mm256_srli_epi64(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm256_setzero_si256() - } else { - transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))) + unsafe { + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))) + } } } @@ -3091,8 +3209,8 @@ pub unsafe fn _mm256_srli_epi64(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 32-bit integers in `a` right by the amount specified by @@ -3103,8 +3221,8 @@ pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { - transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) +pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3115,8 +3233,8 @@ pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } } /// Shifts packed 64-bit integers in `a` right by the amount specified by @@ -3127,8 +3245,8 @@ pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { - transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) +pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } } /// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr @@ -3158,8 +3276,8 @@ pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) } } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a` @@ -3169,8 +3287,8 @@ pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) } } /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a` @@ -3180,8 +3298,8 @@ pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) } } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` @@ -3191,8 +3309,8 @@ pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) } } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in @@ -3203,8 +3321,8 @@ pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) } } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in @@ -3215,8 +3333,8 @@ pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) } } /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit @@ -3227,8 +3345,8 @@ pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) } } /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit @@ -3239,8 +3357,8 @@ pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) } } /// Unpacks and interleave 8-bit integers from the high half of each @@ -3286,15 +3404,17 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { - #[rustfmt::skip] - let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ - 8, 40, 9, 41, 10, 42, 11, 43, - 12, 44, 13, 45, 14, 46, 15, 47, - 24, 56, 25, 57, 26, 58, 27, 59, - 28, 60, 29, 61, 30, 62, 31, 63, - ]); - transmute(r) +pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + #[rustfmt::skip] + let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63, + ]); + transmute(r) + } } /// Unpacks and interleave 8-bit integers from the low half of each @@ -3339,15 +3459,17 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { - #[rustfmt::skip] - let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ - 0, 32, 1, 33, 2, 34, 3, 35, - 4, 36, 5, 37, 6, 38, 7, 39, - 16, 48, 17, 49, 18, 50, 19, 51, - 20, 52, 21, 53, 22, 54, 23, 55, - ]); - transmute(r) +pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + #[rustfmt::skip] + let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [ + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55, + ]); + transmute(r) + } } /// Unpacks and interleave 16-bit integers from the high half of each @@ -3388,13 +3510,15 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { - let r: i16x16 = simd_shuffle!( - a.as_i16x16(), - b.as_i16x16(), - [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], - ); - transmute(r) +pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i16x16 = simd_shuffle!( + a.as_i16x16(), + b.as_i16x16(), + [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], + ); + transmute(r) + } } /// Unpacks and interleave 16-bit integers from the low half of each @@ -3436,13 +3560,15 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { - let r: i16x16 = simd_shuffle!( - a.as_i16x16(), - b.as_i16x16(), - [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], - ); - transmute(r) +pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i16x16 = simd_shuffle!( + a.as_i16x16(), + b.as_i16x16(), + [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], + ); + transmute(r) + } } /// Unpacks and interleave 32-bit integers from the high half of each @@ -3477,9 +3603,11 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { - let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); - transmute(r) +pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); + transmute(r) + } } /// Unpacks and interleave 32-bit integers from the low half of each @@ -3514,9 +3642,11 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { - let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); - transmute(r) +pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); + transmute(r) + } } /// Unpacks and interleave 64-bit integers from the high half of each @@ -3551,9 +3681,11 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { - let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); - transmute(r) +pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); + transmute(r) + } } /// Unpacks and interleave 64-bit integers from the low half of each @@ -3588,9 +3720,11 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { - let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); - transmute(r) +pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); + transmute(r) + } } /// Computes the bitwise XOR of 256 bits (representing integer data) @@ -3601,8 +3735,8 @@ pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } } /// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit @@ -3616,9 +3750,9 @@ pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { +pub fn _mm256_extract_epi8(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 5); - simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 + unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 } } /// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit @@ -3632,9 +3766,9 @@ pub unsafe fn _mm256_extract_epi8(a: __m256i) -> i32 { // This intrinsic has no corresponding instruction. #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { +pub fn _mm256_extract_epi16(a: __m256i) -> i32 { static_assert_uimm_bits!(INDEX, 4); - simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 + unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512bf16.rs b/crates/core_arch/src/x86/avx512bf16.rs index 6789fb1c31..ca45761d08 100644 --- a/crates/core_arch/src/x86/avx512bf16.rs +++ b/crates/core_arch/src/x86/avx512bf16.rs @@ -37,8 +37,8 @@ unsafe extern "C" { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh { - transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) +pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh { + unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors @@ -50,9 +50,11 @@ pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh { - let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) +pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh { + unsafe { + let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) + } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors @@ -64,9 +66,11 @@ pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __ #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh { - let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) +pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh { + unsafe { + let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in two 256-bit vectors @@ -77,8 +81,8 @@ pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m12 #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh { - transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) +pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh { + unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b @@ -89,14 +93,11 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm256_mask_cvtne2ps_pbh( - src: __m256bh, - k: __mmask16, - a: __m256, - b: __m256, -) -> __m256bh { - let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) +pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh { + unsafe { + let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) + } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b @@ -107,9 +108,11 @@ pub unsafe fn _mm256_mask_cvtne2ps_pbh( #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh { - let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) +pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh { + unsafe { + let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in two 512-bit vectors @@ -120,8 +123,8 @@ pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> _ #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh { - transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) +pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh { + unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors @@ -133,14 +136,11 @@ pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh { #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm512_mask_cvtne2ps_pbh( - src: __m512bh, - k: __mmask32, - a: __m512, - b: __m512, -) -> __m512bh { - let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, cvt, src.as_u16x32())) +pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh { + unsafe { + let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x32())) + } } /// Convert packed single-precision (32-bit) floating-point elements in two vectors @@ -152,9 +152,11 @@ pub unsafe fn _mm512_mask_cvtne2ps_pbh( #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] -pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh { - let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, cvt, u16x32::ZERO)) +pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh { + unsafe { + let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, cvt, u16x32::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -164,8 +166,8 @@ pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> _ #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh { - transmute(cvtneps2bf16_256(a.as_f32x8())) +pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh { + unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -176,9 +178,11 @@ pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh { - let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); - transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) +pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh { + unsafe { + let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x8())) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -189,9 +193,11 @@ pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh { - let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); - transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) +pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh { + unsafe { + let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -201,8 +207,8 @@ pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh { #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh { - transmute(cvtneps2bf16_512(a.as_f32x16())) +pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh { + unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -213,9 +219,11 @@ pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh { #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh { - let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); - transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) +pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh { + unsafe { + let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, src.as_u16x16())) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -226,9 +234,11 @@ pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] -pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { - let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); - transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) +pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { + unsafe { + let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -239,8 +249,8 @@ pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 { - transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -252,9 +262,11 @@ pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 { #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 { - let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); - transmute(simd_select_bitmask(k, rst, src.as_f32x4())) +pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { + let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); + transmute(simd_select_bitmask(k, rst, src.as_f32x4())) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -266,10 +278,12 @@ pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m12 #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 { - let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); - let zero = _mm_set1_ps(0.0_f32).as_f32x4(); - transmute(simd_select_bitmask(k, rst, zero)) +pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 { + unsafe { + let rst = _mm_dpbf16_ps(src, a, b).as_f32x4(); + let zero = _mm_set1_ps(0.0_f32).as_f32x4(); + transmute(simd_select_bitmask(k, rst, zero)) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -280,8 +294,8 @@ pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m1 #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 { - transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -293,9 +307,11 @@ pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 { - let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); - transmute(simd_select_bitmask(k, rst, src.as_f32x8())) +pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { + let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); + transmute(simd_select_bitmask(k, rst, src.as_f32x8())) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -307,9 +323,11 @@ pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __ #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 { - let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); - transmute(simd_select_bitmask(k, rst, f32x8::ZERO)) +pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 { + unsafe { + let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); + transmute(simd_select_bitmask(k, rst, f32x8::ZERO)) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -322,8 +340,8 @@ pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: _ #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 { - transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -335,9 +353,11 @@ pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 { - let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); - transmute(simd_select_bitmask(k, rst, src.as_f32x16())) +pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { + let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); + transmute(simd_select_bitmask(k, rst, src.as_f32x16())) + } } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -349,14 +369,11 @@ pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: _ #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr("vdpbf16ps"))] -pub unsafe fn _mm512_maskz_dpbf16_ps( - k: __mmask16, - src: __m512, - a: __m512bh, - b: __m512bh, -) -> __m512 { - let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); - transmute(simd_select_bitmask(k, rst, f32x16::ZERO)) +pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 { + unsafe { + let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); + transmute(simd_select_bitmask(k, rst, f32x16::ZERO)) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -366,8 +383,8 @@ pub unsafe fn _mm512_maskz_dpbf16_ps( #[inline] #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 { - _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) +pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 { + unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -378,9 +395,11 @@ pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 { #[inline] #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 { - let cvt = _mm512_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16())) +pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 { + unsafe { + let cvt = _mm512_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16())) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -391,9 +410,11 @@ pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> _ #[inline] #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 { - let cvt = _mm512_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO)) +pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 { + unsafe { + let cvt = _mm512_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO)) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -403,8 +424,8 @@ pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 { #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 { - _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) +pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 { + unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -415,9 +436,11 @@ pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 { #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 { - let cvt = _mm256_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8())) +pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 { + unsafe { + let cvt = _mm256_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8())) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -428,9 +451,11 @@ pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __ #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 { - let cvt = _mm256_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 { + unsafe { + let cvt = _mm256_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO)) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point @@ -440,8 +465,8 @@ pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 { #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 { - _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) +pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 { + unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) } } /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point @@ -452,9 +477,11 @@ pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 { #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 { - let cvt = _mm_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 { + unsafe { + let cvt = _mm_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4())) + } } /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point @@ -465,9 +492,11 @@ pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m12 #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 { - let cvt = _mm_cvtpbh_ps(a); - transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 { + unsafe { + let cvt = _mm_cvtpbh_ps(a); + transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO)) + } } /// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point @@ -477,7 +506,7 @@ pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 { #[inline] #[target_feature(enable = "avx512bf16,avx512f")] #[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] -pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 { +pub fn _mm_cvtsbh_ss(a: bf16) -> f32 { f32::from_bits((a.to_bits() as u32) << 16) } @@ -489,15 +518,17 @@ pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 { #[target_feature(enable = "avx512bf16,avx512vl")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { - let mut dst: __m128bh; - asm!( - "vcvtneps2bf16 {dst}, {src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst +pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "vcvtneps2bf16 {dst}, {src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } } /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -509,16 +540,18 @@ pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh { #[target_feature(enable = "avx512bf16,avx512vl")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh { - let mut dst = src; - asm!( - "vcvtneps2bf16 {dst}{{{k}}},{src}", - dst = inlateout(xmm_reg) dst, - src = in(xmm_reg) a, - k = in(kreg) k, - options(pure, nomem, nostack, preserves_flags) - ); - dst +pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh { + unsafe { + let mut dst = src; + asm!( + "vcvtneps2bf16 {dst}{{{k}}},{src}", + dst = inlateout(xmm_reg) dst, + src = in(xmm_reg) a, + k = in(kreg) k, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } } /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -530,16 +563,18 @@ pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m #[target_feature(enable = "avx512bf16,avx512vl")] #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh { - let mut dst: __m128bh; - asm!( - "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - k = in(kreg) k, - options(pure, nomem, nostack, preserves_flags) - ); - dst +pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + k = in(kreg) k, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } } /// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point @@ -549,9 +584,11 @@ pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh { #[inline] #[target_feature(enable = "avx512bf16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] -pub unsafe fn _mm_cvtness_sbh(a: f32) -> bf16 { - let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0); - bf16::from_bits(value) +pub fn _mm_cvtness_sbh(a: f32) -> bf16 { + unsafe { + let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0); + bf16::from_bits(value) + } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/avx512bitalg.rs b/crates/core_arch/src/x86/avx512bitalg.rs index 5640ef8bf4..e27b737870 100644 --- a/crates/core_arch/src/x86/avx512bitalg.rs +++ b/crates/core_arch/src/x86/avx512bitalg.rs @@ -43,8 +43,8 @@ unsafe extern "C" { #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { - transmute(simd_ctpop(a.as_i16x32())) +pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i16x32())) } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -57,12 +57,14 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x32()), - i16x32::ZERO, - )) +pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x32()), + i16x32::ZERO, + )) + } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -75,12 +77,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x32()), - src.as_i16x32(), - )) +pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x32()), + src.as_i16x32(), + )) + } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -90,8 +94,8 @@ pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) - #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { - transmute(simd_ctpop(a.as_i16x16())) +pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i16x16())) } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -104,12 +108,14 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x16()), - i16x16::ZERO, - )) +pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x16()), + i16x16::ZERO, + )) + } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -122,12 +128,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x16()), - src.as_i16x16(), - )) +pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x16()), + src.as_i16x16(), + )) + } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -137,8 +145,8 @@ pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) - #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i { - transmute(simd_ctpop(a.as_i16x8())) +pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i16x8())) } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -151,12 +159,14 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x8()), - i16x8::ZERO, - )) +pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x8()), + i16x8::ZERO, + )) + } } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -169,12 +179,14 @@ pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] -pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i16x8()), - src.as_i16x8(), - )) +pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x8()), + src.as_i16x8(), + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -184,8 +196,8 @@ pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __ #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { - transmute(simd_ctpop(a.as_i8x64())) +pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i8x64())) } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -198,12 +210,14 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x64()), - i8x64::ZERO, - )) +pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x64()), + i8x64::ZERO, + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -216,12 +230,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x64()), - src.as_i8x64(), - )) +pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x64()), + src.as_i8x64(), + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -231,8 +247,8 @@ pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { - transmute(simd_ctpop(a.as_i8x32())) +pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i8x32())) } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -245,12 +261,14 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x32()), - i8x32::ZERO, - )) +pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x32()), + i8x32::ZERO, + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -263,12 +281,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x32()), - src.as_i8x32(), - )) +pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x32()), + src.as_i8x32(), + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -278,8 +298,8 @@ pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i { - transmute(simd_ctpop(a.as_i8x16())) +pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i8x16())) } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -292,12 +312,14 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x16()), - i8x16::ZERO, - )) +pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x16()), + i8x16::ZERO, + )) + } } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -310,12 +332,14 @@ pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] -pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i8x16()), - src.as_i8x16(), - )) +pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x16()), + src.as_i8x16(), + )) + } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -327,8 +351,8 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __ #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { - bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) +pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { + unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -343,8 +367,8 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 #[target_feature(enable = "avx512bitalg")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 { - bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) +pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 { + unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -356,8 +380,8 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { - bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) +pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { + unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -372,8 +396,8 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 { - bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) +pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 { + unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -385,8 +409,8 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { - bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) +pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { + unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) } } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. @@ -401,8 +425,8 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bitalg,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufbitqmb))] -pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 { - bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) +pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 { + unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index caac75b346..11d1f93f37 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -16,10 +16,12 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i { - let a = a.as_i16x32(); - let cmp: i16x32 = simd_gt(a, i16x32::ZERO); - transmute(simd_select(cmp, a, simd_neg(a))) +pub fn _mm512_abs_epi16(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let cmp: i16x32 = simd_gt(a, i16x32::ZERO); + transmute(simd_select(cmp, a, simd_neg(a))) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -29,9 +31,11 @@ pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, abs, src.as_i16x32())) +pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, abs, src.as_i16x32())) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -41,9 +45,11 @@ pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, abs, i16x32::ZERO)) +pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, abs, i16x32::ZERO)) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -53,9 +59,11 @@ pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, abs, src.as_i16x16())) +pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, abs, src.as_i16x16())) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -65,9 +73,11 @@ pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, abs, i16x16::ZERO)) +pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, abs, i16x16::ZERO)) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -77,9 +87,11 @@ pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, abs, src.as_i16x8())) +pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, abs, src.as_i16x8())) + } } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -89,9 +101,11 @@ pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsw))] -pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, abs, i16x8::ZERO)) +pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, abs, i16x8::ZERO)) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst. @@ -101,10 +115,12 @@ pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i { - let a = a.as_i8x64(); - let cmp: i8x64 = simd_gt(a, i8x64::ZERO); - transmute(simd_select(cmp, a, simd_neg(a))) +pub fn _mm512_abs_epi8(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let cmp: i8x64 = simd_gt(a, i8x64::ZERO); + transmute(simd_select(cmp, a, simd_neg(a))) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -114,9 +130,11 @@ pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, abs, src.as_i8x64())) +pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, abs, src.as_i8x64())) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -126,9 +144,11 @@ pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, abs, i8x64::ZERO)) +pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, abs, i8x64::ZERO)) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -138,9 +158,11 @@ pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, abs, src.as_i8x32())) +pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, abs, src.as_i8x32())) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -150,9 +172,11 @@ pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, abs, i8x32::ZERO)) +pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, abs, i8x32::ZERO)) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set) @@ -162,9 +186,11 @@ pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - let abs = _mm_abs_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, abs, src.as_i8x16())) +pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, abs, src.as_i8x16())) + } } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -174,9 +200,11 @@ pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsb))] -pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { - let abs = _mm_abs_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, abs, i8x16::ZERO)) +pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, abs, i8x16::ZERO)) + } } /// Add packed 16-bit integers in a and b, and store the results in dst. @@ -186,8 +214,8 @@ pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_add(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) } } /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -197,9 +225,11 @@ pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, add, src.as_i16x32())) +pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, src.as_i16x32())) + } } /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -209,9 +239,11 @@ pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, add, i16x32::ZERO)) +pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) + } } /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -221,9 +253,11 @@ pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, add, src.as_i16x16())) +pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, src.as_i16x16())) + } } /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -233,9 +267,11 @@ pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, add, i16x16::ZERO)) +pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) + } } /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -245,9 +281,11 @@ pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, add, src.as_i16x8())) +pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, src.as_i16x8())) + } } /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -257,9 +295,11 @@ pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddw))] -pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, add, i16x8::ZERO)) +pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) + } } /// Add packed 8-bit integers in a and b, and store the results in dst. @@ -269,8 +309,8 @@ pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_add(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) } } /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -280,9 +320,11 @@ pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, add, src.as_i8x64())) +pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, src.as_i8x64())) + } } /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -292,9 +334,11 @@ pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, add, i8x64::ZERO)) +pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) + } } /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -304,9 +348,11 @@ pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, add, src.as_i8x32())) +pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, src.as_i8x32())) + } } /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -316,9 +362,11 @@ pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, add, i8x32::ZERO)) +pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) + } } /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -328,9 +376,11 @@ pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, add, src.as_i8x16())) +pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, src.as_i8x16())) + } } /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -340,9 +390,11 @@ pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddb))] -pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, add, i8x16::ZERO)) +pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst. @@ -352,8 +404,8 @@ pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -363,14 +415,11 @@ pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm512_mask_adds_epu16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let add = _mm512_adds_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, add, src.as_u16x32())) +pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, add, src.as_u16x32())) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -380,9 +429,11 @@ pub unsafe fn _mm512_mask_adds_epu16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, add, u16x32::ZERO)) +pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, add, u16x32::ZERO)) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -392,14 +443,11 @@ pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm256_mask_adds_epu16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let add = _mm256_adds_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, add, src.as_u16x16())) +pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, add, src.as_u16x16())) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -409,9 +457,11 @@ pub unsafe fn _mm256_mask_adds_epu16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, add, u16x16::ZERO)) +pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, add, u16x16::ZERO)) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -421,9 +471,11 @@ pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, add, src.as_u16x8())) +pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, add, src.as_u16x8())) + } } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -433,9 +485,11 @@ pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusw))] -pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, add, u16x8::ZERO)) +pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, add, u16x8::ZERO)) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst. @@ -445,8 +499,8 @@ pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -456,9 +510,11 @@ pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, add, src.as_u8x64())) +pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, add, src.as_u8x64())) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -468,9 +524,11 @@ pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, add, u8x64::ZERO)) +pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, add, u8x64::ZERO)) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -480,9 +538,11 @@ pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, add, src.as_u8x32())) +pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, add, src.as_u8x32())) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -492,9 +552,11 @@ pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, add, u8x32::ZERO)) +pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, add, u8x32::ZERO)) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -504,9 +566,11 @@ pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, add, src.as_u8x16())) +pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, add, src.as_u8x16())) + } } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -516,9 +580,11 @@ pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddusb))] -pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, add, u8x16::ZERO)) +pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, add, u8x16::ZERO)) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst. @@ -528,8 +594,8 @@ pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -539,14 +605,11 @@ pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm512_mask_adds_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let add = _mm512_adds_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, add, src.as_i16x32())) +pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, src.as_i16x32())) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -556,9 +619,11 @@ pub unsafe fn _mm512_mask_adds_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, add, i16x32::ZERO)) +pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -568,14 +633,11 @@ pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm256_mask_adds_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let add = _mm256_adds_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, add, src.as_i16x16())) +pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, src.as_i16x16())) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -585,9 +647,11 @@ pub unsafe fn _mm256_mask_adds_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, add, i16x16::ZERO)) +pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -597,9 +661,11 @@ pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, add, src.as_i16x8())) +pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, src.as_i16x8())) + } } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -609,9 +675,11 @@ pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsw))] -pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, add, i16x8::ZERO)) +pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst. @@ -621,8 +689,8 @@ pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -632,9 +700,11 @@ pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, add, src.as_i8x64())) +pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, src.as_i8x64())) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -644,9 +714,11 @@ pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_adds_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, add, i8x64::ZERO)) +pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_adds_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -656,9 +728,11 @@ pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, add, src.as_i8x32())) +pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, src.as_i8x32())) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -668,9 +742,11 @@ pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_adds_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, add, i8x32::ZERO)) +pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_adds_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -680,9 +756,11 @@ pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, add, src.as_i8x16())) +pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, src.as_i8x16())) + } } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -692,9 +770,11 @@ pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddsb))] -pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_adds_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, add, i8x16::ZERO)) +pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_adds_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst. @@ -704,8 +784,8 @@ pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -715,9 +795,11 @@ pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, sub, src.as_i16x32())) +pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, src.as_i16x32())) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -727,9 +809,11 @@ pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) +pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -739,9 +823,11 @@ pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, sub, src.as_i16x16())) +pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, src.as_i16x16())) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -751,9 +837,11 @@ pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) +pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -763,9 +851,11 @@ pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, sub, src.as_i16x8())) +pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, src.as_i16x8())) + } } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -775,9 +865,11 @@ pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubw))] -pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) +pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst. @@ -787,8 +879,8 @@ pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -798,9 +890,11 @@ pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, sub, src.as_i8x64())) +pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, src.as_i8x64())) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -810,9 +904,11 @@ pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) +pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -822,9 +918,11 @@ pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, sub, src.as_i8x32())) +pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, src.as_i8x32())) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -834,9 +932,11 @@ pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) +pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -846,9 +946,11 @@ pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, sub, src.as_i8x16())) +pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, src.as_i8x16())) + } } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -858,9 +960,11 @@ pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubb))] -pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) +pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst. @@ -870,8 +974,8 @@ pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -881,14 +985,11 @@ pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm512_mask_subs_epu16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let sub = _mm512_subs_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, sub, src.as_u16x32())) +pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, sub, src.as_u16x32())) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -898,9 +999,11 @@ pub unsafe fn _mm512_mask_subs_epu16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, sub, u16x32::ZERO)) +pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, sub, u16x32::ZERO)) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -910,14 +1013,11 @@ pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm256_mask_subs_epu16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let sub = _mm256_subs_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, sub, src.as_u16x16())) +pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, sub, src.as_u16x16())) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -927,9 +1027,11 @@ pub unsafe fn _mm256_mask_subs_epu16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, sub, u16x16::ZERO)) +pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, sub, u16x16::ZERO)) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -939,9 +1041,11 @@ pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, sub, src.as_u16x8())) +pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, sub, src.as_u16x8())) + } } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -951,9 +1055,11 @@ pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusw))] -pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, sub, u16x8::ZERO)) +pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, sub, u16x8::ZERO)) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst. @@ -963,8 +1069,8 @@ pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -974,9 +1080,11 @@ pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, sub, src.as_u8x64())) +pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, sub, src.as_u8x64())) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -986,9 +1094,11 @@ pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, sub, u8x64::ZERO)) +pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, sub, u8x64::ZERO)) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -998,9 +1108,11 @@ pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, sub, src.as_u8x32())) +pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, sub, src.as_u8x32())) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1010,9 +1122,11 @@ pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, sub, u8x32::ZERO)) +pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, sub, u8x32::ZERO)) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1022,9 +1136,11 @@ pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, sub, src.as_u8x16())) +pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, sub, src.as_u8x16())) + } } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1034,9 +1150,11 @@ pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubusb))] -pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, sub, u8x16::ZERO)) +pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, sub, u8x16::ZERO)) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst. @@ -1046,8 +1164,8 @@ pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1057,14 +1175,11 @@ pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm512_mask_subs_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let sub = _mm512_subs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, sub, src.as_i16x32())) +pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, src.as_i16x32())) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1074,9 +1189,11 @@ pub unsafe fn _mm512_mask_subs_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) +pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1086,14 +1203,11 @@ pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm256_mask_subs_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let sub = _mm256_subs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, sub, src.as_i16x16())) +pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, src.as_i16x16())) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1103,9 +1217,11 @@ pub unsafe fn _mm256_mask_subs_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) +pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1115,9 +1231,11 @@ pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, sub, src.as_i16x8())) +pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, src.as_i16x8())) + } } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1127,9 +1245,11 @@ pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsw))] -pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) +pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst. @@ -1139,8 +1259,8 @@ pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1150,9 +1270,11 @@ pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, sub, src.as_i8x64())) +pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, src.as_i8x64())) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1162,9 +1284,11 @@ pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_subs_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) +pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_subs_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1174,9 +1298,11 @@ pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, sub, src.as_i8x32())) +pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, src.as_i8x32())) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1186,9 +1312,11 @@ pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_subs_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) +pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_subs_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1198,9 +1326,11 @@ pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, sub, src.as_i8x16())) +pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, src.as_i8x16())) + } } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1210,9 +1340,11 @@ pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubsb))] -pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_subs_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) +pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_subs_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. @@ -1222,11 +1354,13 @@ pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { - let a = simd_cast::<_, u32x32>(a.as_u16x32()); - let b = simd_cast::<_, u32x32>(b.as_u16x32()); - let r = simd_shr(simd_mul(a, b), u32x32::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u32x32>(a.as_u16x32()); + let b = simd_cast::<_, u32x32>(b.as_u16x32()); + let r = simd_shr(simd_mul(a, b), u32x32::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1236,14 +1370,11 @@ pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm512_mask_mulhi_epu16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, mul, src.as_u16x32())) +pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, mul, src.as_u16x32())) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1253,9 +1384,11 @@ pub unsafe fn _mm512_mask_mulhi_epu16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, mul, u16x32::ZERO)) +pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, mul, u16x32::ZERO)) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1265,14 +1398,11 @@ pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm256_mask_mulhi_epu16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, mul, src.as_u16x16())) +pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, mul, src.as_u16x16())) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1282,9 +1412,11 @@ pub unsafe fn _mm256_mask_mulhi_epu16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, mul, u16x16::ZERO)) +pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, mul, u16x16::ZERO)) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1294,9 +1426,11 @@ pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhi_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, mul, src.as_u16x8())) +pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, mul, src.as_u16x8())) + } } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1306,9 +1440,11 @@ pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhuw))] -pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhi_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, mul, u16x8::ZERO)) +pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, mul, u16x8::ZERO)) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. @@ -1318,11 +1454,13 @@ pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { - let a = simd_cast::<_, i32x32>(a.as_i16x32()); - let b = simd_cast::<_, i32x32>(b.as_i16x32()); - let r = simd_shr(simd_mul(a, b), i32x32::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, i32x32>(a.as_i16x32()); + let b = simd_cast::<_, i32x32>(b.as_i16x32()); + let r = simd_shr(simd_mul(a, b), i32x32::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1332,14 +1470,11 @@ pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm512_mask_mulhi_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, src.as_i16x32())) +pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1349,9 +1484,11 @@ pub unsafe fn _mm512_mask_mulhi_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) +pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1361,14 +1498,11 @@ pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm256_mask_mulhi_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, src.as_i16x16())) +pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1378,9 +1512,11 @@ pub unsafe fn _mm256_mask_mulhi_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) +pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1390,9 +1526,11 @@ pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhi_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, src.as_i16x8())) +pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1402,9 +1540,11 @@ pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhw))] -pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhi_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) +pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst. @@ -1414,8 +1554,8 @@ pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1425,14 +1565,11 @@ pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm512_mask_mulhrs_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, src.as_i16x32())) +pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1442,9 +1579,11 @@ pub unsafe fn _mm512_mask_mulhrs_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) +pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1454,14 +1593,11 @@ pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm256_mask_mulhrs_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, src.as_i16x16())) +pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1471,9 +1607,11 @@ pub unsafe fn _mm256_mask_mulhrs_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) +pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1483,9 +1621,11 @@ pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, src.as_i16x8())) +pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1495,9 +1635,11 @@ pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulhrsw))] -pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) +pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst. @@ -1507,8 +1649,8 @@ pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1518,14 +1660,11 @@ pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm512_mask_mullo_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mullo_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, src.as_i16x32())) +pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, src.as_i16x32())) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1535,9 +1674,11 @@ pub unsafe fn _mm512_mask_mullo_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mullo_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) +pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1547,14 +1688,11 @@ pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm256_mask_mullo_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let mul = _mm256_mullo_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, src.as_i16x16())) +pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, src.as_i16x16())) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1564,9 +1702,11 @@ pub unsafe fn _mm256_mask_mullo_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mullo_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) +pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1576,9 +1716,11 @@ pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mullo_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, src.as_i16x8())) +pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, src.as_i16x8())) + } } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1588,9 +1730,11 @@ pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmullw))] -pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mullo_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) +pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst. @@ -1600,10 +1744,12 @@ pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u16x32(); - let b = b.as_u16x32(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u16x32(); + let b = b.as_u16x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1613,9 +1759,11 @@ pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, max, src.as_u16x32())) +pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, max, src.as_u16x32())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1625,9 +1773,11 @@ pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, max, u16x32::ZERO)) +pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, max, u16x32::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1637,9 +1787,11 @@ pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, max, src.as_u16x16())) +pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, max, src.as_u16x16())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1649,9 +1801,11 @@ pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, max, u16x16::ZERO)) +pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, max, u16x16::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1661,9 +1815,11 @@ pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, max, src.as_u16x8())) +pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, max, src.as_u16x8())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1673,9 +1829,11 @@ pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuw))] -pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, max, u16x8::ZERO)) +pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, max, u16x8::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst. @@ -1685,10 +1843,12 @@ pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u8x64(); - let b = b.as_u8x64(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u8x64(); + let b = b.as_u8x64(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1698,9 +1858,11 @@ pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, max, src.as_u8x64())) +pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, max, src.as_u8x64())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1710,9 +1872,11 @@ pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, max, u8x64::ZERO)) +pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, max, u8x64::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1722,9 +1886,11 @@ pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, max, src.as_u8x32())) +pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, max, src.as_u8x32())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1734,9 +1900,11 @@ pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, max, u8x32::ZERO)) +pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, max, u8x32::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1746,9 +1914,11 @@ pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, max, src.as_u8x16())) +pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, max, src.as_u8x16())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1758,9 +1928,11 @@ pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxub))] -pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, max, u8x16::ZERO)) +pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, max, u8x16::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst. @@ -1770,10 +1942,12 @@ pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i16x32(); - let b = b.as_i16x32(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1783,9 +1957,11 @@ pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, max, src.as_i16x32())) +pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, max, src.as_i16x32())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1795,9 +1971,11 @@ pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, max, i16x32::ZERO)) +pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, max, i16x32::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1807,9 +1985,11 @@ pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, max, src.as_i16x16())) +pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, max, src.as_i16x16())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1819,9 +1999,11 @@ pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, max, i16x16::ZERO)) +pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, max, i16x16::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1831,9 +2013,11 @@ pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, max, src.as_i16x8())) +pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, max, src.as_i16x8())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1843,9 +2027,11 @@ pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsw))] -pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, max, i16x8::ZERO)) +pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, max, i16x8::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst. @@ -1855,10 +2041,12 @@ pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i8x64(); - let b = b.as_i8x64(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1868,9 +2056,11 @@ pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, max, src.as_i8x64())) +pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, max, src.as_i8x64())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1880,9 +2070,11 @@ pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, max, i8x64::ZERO)) +pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, max, i8x64::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1892,9 +2084,11 @@ pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, max, src.as_i8x32())) +pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, max, src.as_i8x32())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1904,9 +2098,11 @@ pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, max, i8x32::ZERO)) +pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, max, i8x32::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1916,9 +2112,11 @@ pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, max, src.as_i8x16())) +pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, max, src.as_i8x16())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1928,9 +2126,11 @@ pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsb))] -pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, max, i8x16::ZERO)) +pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, max, i8x16::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst. @@ -1940,10 +2140,12 @@ pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u16x32(); - let b = b.as_u16x32(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u16x32(); + let b = b.as_u16x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1953,9 +2155,11 @@ pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, min, src.as_u16x32())) +pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, min, src.as_u16x32())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1965,9 +2169,11 @@ pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, min, u16x32::ZERO)) +pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, min, u16x32::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1977,9 +2183,11 @@ pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, min, src.as_u16x16())) +pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, min, src.as_u16x16())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1989,9 +2197,11 @@ pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, min, u16x16::ZERO)) +pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, min, u16x16::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2001,9 +2211,11 @@ pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, min, src.as_u16x8())) +pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, min, src.as_u16x8())) + } } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2013,9 +2225,11 @@ pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuw))] -pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, min, u16x8::ZERO)) +pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, min, u16x8::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst. @@ -2025,10 +2239,12 @@ pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u8x64(); - let b = b.as_u8x64(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u8x64(); + let b = b.as_u8x64(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2038,9 +2254,11 @@ pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, min, src.as_u8x64())) +pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, min, src.as_u8x64())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2050,9 +2268,11 @@ pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, min, u8x64::ZERO)) +pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, min, u8x64::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2062,9 +2282,11 @@ pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, min, src.as_u8x32())) +pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, min, src.as_u8x32())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2074,9 +2296,11 @@ pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, min, u8x32::ZERO)) +pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, min, u8x32::ZERO)) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2086,9 +2310,11 @@ pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, min, src.as_u8x16())) +pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, min, src.as_u8x16())) + } } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2098,9 +2324,11 @@ pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminub))] -pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, min, u8x16::ZERO)) +pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, min, u8x16::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst. @@ -2110,10 +2338,12 @@ pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i16x32(); - let b = b.as_i16x32(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2123,9 +2353,11 @@ pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, min, src.as_i16x32())) +pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, min, src.as_i16x32())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2135,9 +2367,11 @@ pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, min, i16x32::ZERO)) +pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, min, i16x32::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2147,9 +2381,11 @@ pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, min, src.as_i16x16())) +pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, min, src.as_i16x16())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2159,9 +2395,11 @@ pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, min, i16x16::ZERO)) +pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, min, i16x16::ZERO)) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2171,9 +2409,11 @@ pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, min, src.as_i16x8())) +pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, min, src.as_i16x8())) + } } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2183,9 +2423,11 @@ pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsw))] -pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, min, i16x8::ZERO)) +pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, min, i16x8::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst. @@ -2195,10 +2437,12 @@ pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i8x64(); - let b = b.as_i8x64(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2208,9 +2452,11 @@ pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, min, src.as_i8x64())) +pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, min, src.as_i8x64())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2220,9 +2466,11 @@ pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, min, i8x64::ZERO)) +pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, min, i8x64::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2232,9 +2480,11 @@ pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, min, src.as_i8x32())) +pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, min, src.as_i8x32())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2244,9 +2494,11 @@ pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, min, i8x32::ZERO)) +pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, min, i8x32::ZERO)) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2256,9 +2508,11 @@ pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, min, src.as_i8x16())) +pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, min, src.as_i8x16())) + } } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2268,9 +2522,11 @@ pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsb))] -pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, min, i8x16::ZERO)) +pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, min, i8x16::ZERO)) + } } /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. @@ -2280,8 +2536,8 @@ pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_lt(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2291,7 +2547,7 @@ pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2302,8 +2558,8 @@ pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2313,7 +2569,7 @@ pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2324,8 +2580,8 @@ pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2335,7 +2591,7 @@ pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2346,8 +2602,8 @@ pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_lt(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2357,7 +2613,7 @@ pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2368,8 +2624,8 @@ pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_lt(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2379,7 +2635,7 @@ pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2390,8 +2646,8 @@ pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2401,7 +2657,7 @@ pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2412,8 +2668,8 @@ pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_lt(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2423,7 +2679,7 @@ pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2434,8 +2690,8 @@ pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2445,7 +2701,7 @@ pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2456,8 +2712,8 @@ pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2467,7 +2723,7 @@ pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2478,8 +2734,8 @@ pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_lt(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2489,7 +2745,7 @@ pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2500,8 +2756,8 @@ pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_lt(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2511,7 +2767,7 @@ pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2522,8 +2778,8 @@ pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2533,7 +2789,7 @@ pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -2544,8 +2800,8 @@ pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_gt(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2555,7 +2811,7 @@ pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2566,8 +2822,8 @@ pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2577,7 +2833,7 @@ pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2588,8 +2844,8 @@ pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2599,7 +2855,7 @@ pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2610,8 +2866,8 @@ pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_gt(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2621,7 +2877,7 @@ pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2632,8 +2888,8 @@ pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_gt(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2643,7 +2899,7 @@ pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2654,8 +2910,8 @@ pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2665,7 +2921,7 @@ pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2676,8 +2932,8 @@ pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_gt(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2687,7 +2943,7 @@ pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2698,8 +2954,8 @@ pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2709,7 +2965,7 @@ pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2720,8 +2976,8 @@ pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2731,7 +2987,7 @@ pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2742,8 +2998,8 @@ pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_gt(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2753,7 +3009,7 @@ pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2764,8 +3020,8 @@ pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_gt(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2775,7 +3031,7 @@ pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2786,8 +3042,8 @@ pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2797,7 +3053,7 @@ pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -2808,8 +3064,8 @@ pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_le(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2819,7 +3075,7 @@ pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2830,8 +3086,8 @@ pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2841,7 +3097,7 @@ pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2852,8 +3108,8 @@ pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2863,7 +3119,7 @@ pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2874,8 +3130,8 @@ pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_le(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_le(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2885,7 +3141,7 @@ pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2896,8 +3152,8 @@ pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_le(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2907,7 +3163,7 @@ pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2918,8 +3174,8 @@ pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2929,7 +3185,7 @@ pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2940,8 +3196,8 @@ pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_le(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2951,7 +3207,7 @@ pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2962,8 +3218,8 @@ pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2973,7 +3229,7 @@ pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -2984,8 +3240,8 @@ pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -2995,7 +3251,7 @@ pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -3006,8 +3262,8 @@ pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_le(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_le(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3017,7 +3273,7 @@ pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -3028,8 +3284,8 @@ pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_le(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_le(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3039,7 +3295,7 @@ pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -3050,8 +3306,8 @@ pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3061,7 +3317,7 @@ pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -3072,8 +3328,8 @@ pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_ge(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3083,7 +3339,7 @@ pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3094,8 +3350,8 @@ pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3105,7 +3361,7 @@ pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3116,8 +3372,8 @@ pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3127,7 +3383,7 @@ pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3138,8 +3394,8 @@ pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_ge(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3149,7 +3405,7 @@ pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3160,8 +3416,8 @@ pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_ge(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3171,7 +3427,7 @@ pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3182,8 +3438,8 @@ pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3193,7 +3449,7 @@ pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3204,8 +3460,8 @@ pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_ge(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3215,7 +3471,7 @@ pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3226,8 +3482,8 @@ pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3237,7 +3493,7 @@ pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3248,8 +3504,8 @@ pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3259,7 +3515,7 @@ pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3270,8 +3526,8 @@ pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_ge(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3281,7 +3537,7 @@ pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3292,8 +3548,8 @@ pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_ge(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3303,7 +3559,7 @@ pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3314,8 +3570,8 @@ pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3325,7 +3581,7 @@ pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -3336,8 +3592,8 @@ pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_eq(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3347,7 +3603,7 @@ pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3358,8 +3614,8 @@ pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3369,7 +3625,7 @@ pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3380,8 +3636,8 @@ pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3391,7 +3647,7 @@ pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3402,8 +3658,8 @@ pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_eq(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3413,7 +3669,7 @@ pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3424,8 +3680,8 @@ pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_eq(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3435,7 +3691,7 @@ pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3446,8 +3702,8 @@ pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3457,7 +3713,7 @@ pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3468,8 +3724,8 @@ pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_eq(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3479,7 +3735,7 @@ pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3490,8 +3746,8 @@ pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3501,7 +3757,7 @@ pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3512,8 +3768,8 @@ pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3523,7 +3779,7 @@ pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3534,8 +3790,8 @@ pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_eq(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3545,7 +3801,7 @@ pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3556,8 +3812,8 @@ pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_eq(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3567,7 +3823,7 @@ pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3578,8 +3834,8 @@ pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3589,7 +3845,7 @@ pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -3600,8 +3856,8 @@ pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_ne(a.as_u16x32(), b.as_u16x32())) +pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x32(), b.as_u16x32())) } } /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3611,7 +3867,7 @@ pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3622,8 +3878,8 @@ pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_u16x16(), b.as_u16x16())) +pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x16(), b.as_u16x16())) } } /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3633,7 +3889,7 @@ pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3644,8 +3900,8 @@ pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u16x8(), b.as_u16x8())) } } /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3655,7 +3911,7 @@ pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3666,8 +3922,8 @@ pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_ne(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x64(), b.as_u8x64())) } } /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3677,7 +3933,7 @@ pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3688,8 +3944,8 @@ pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_ne(a.as_u8x32(), b.as_u8x32())) +pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x32(), b.as_u8x32())) } } /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3699,7 +3955,7 @@ pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3710,8 +3966,8 @@ pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u8x16(), b.as_u8x16())) } } /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3721,7 +3977,7 @@ pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3732,8 +3988,8 @@ pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) - #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - simd_bitmask::(simd_ne(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x32(), b.as_i16x32())) } } /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3743,7 +3999,7 @@ pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3754,8 +4010,8 @@ pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_i16x16(), b.as_i16x16())) +pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x16(), b.as_i16x16())) } } /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3765,7 +4021,7 @@ pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3776,8 +4032,8 @@ pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i16x8(), b.as_i16x8())) } } /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3787,7 +4043,7 @@ pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3798,8 +4054,8 @@ pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - simd_bitmask::(simd_ne(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x64(), b.as_i8x64())) } } /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3809,7 +4065,7 @@ pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3820,8 +4076,8 @@ pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - simd_bitmask::(simd_ne(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x32(), b.as_i8x32())) } } /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3831,7 +4087,7 @@ pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3842,8 +4098,8 @@ pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i8x16(), b.as_i8x16())) } } /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3853,7 +4109,7 @@ pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -3865,21 +4121,23 @@ pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x32(); - let b = b.as_u16x32(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x32::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x32::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x32(); + let b = b.as_u16x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x32::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3890,26 +4148,28 @@ pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_epu16_mask( +pub fn _mm512_mask_cmp_epu16_mask( k1: __mmask32, a: __m512i, b: __m512i, ) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x32(); - let b = b.as_u16x32(); - let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x32::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x32(); + let b = b.as_u16x32(); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -3920,21 +4180,23 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x16(); - let b = b.as_u16x16(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x16(); + let b = b.as_u16x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -3945,26 +4207,28 @@ pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_epu16_mask( +pub fn _mm256_mask_cmp_epu16_mask( k1: __mmask16, a: __m256i, b: __m256i, ) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x16(); - let b = b.as_u16x16(); - let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x16(); + let b = b.as_u16x16(); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -3975,21 +4239,23 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x8(); - let b = b.as_u16x8(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x8(); + let b = b.as_u16x8(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4000,26 +4266,24 @@ pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_epu16_mask( - k1: __mmask8, - a: __m128i, - b: __m128i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u16x8(); - let b = b.as_u16x8(); - let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) +pub fn _mm_mask_cmp_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u16x8(); + let b = b.as_u16x8(); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4030,21 +4294,23 @@ pub unsafe fn _mm_mask_cmp_epu16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x64(); - let b = b.as_u8x64(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x64::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x64::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x64::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x64::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4055,26 +4321,28 @@ pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_epu8_mask( +pub fn _mm512_mask_cmp_epu8_mask( k1: __mmask64, a: __m512i, b: __m512i, ) -> __mmask64 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x64(); - let b = b.as_u8x64(); - let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x64::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x64::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4085,21 +4353,23 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x32(); - let b = b.as_u8x32(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x32::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x32::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x32::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4110,26 +4380,28 @@ pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_epu8_mask( +pub fn _mm256_mask_cmp_epu8_mask( k1: __mmask32, a: __m256i, b: __m256i, ) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x32(); - let b = b.as_u8x32(); - let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x32::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4140,21 +4412,23 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x16(); - let b = b.as_u8x16(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4165,26 +4439,24 @@ pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_epu8_mask( - k1: __mmask16, - a: __m128i, - b: __m128i, -) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_u8x16(); - let b = b.as_u8x16(); - let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) +pub fn _mm_mask_cmp_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4195,21 +4467,23 @@ pub unsafe fn _mm_mask_cmp_epu8_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x32(); - let b = b.as_i16x32(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x32::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x32::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x32(); + let b = b.as_i16x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x32::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4220,26 +4494,28 @@ pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_epi16_mask( +pub fn _mm512_mask_cmp_epi16_mask( k1: __mmask32, a: __m512i, b: __m512i, ) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x32(); - let b = b.as_i16x32(); - let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x32::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x32(); + let b = b.as_i16x32(); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4250,21 +4526,23 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x16(); - let b = b.as_i16x16(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x16(); + let b = b.as_i16x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4275,26 +4553,28 @@ pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_epi16_mask( +pub fn _mm256_mask_cmp_epi16_mask( k1: __mmask16, a: __m256i, b: __m256i, ) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x16(); - let b = b.as_i16x16(); - let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x16(); + let b = b.as_i16x16(); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4305,21 +4585,23 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x8(); - let b = b.as_i16x8(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i16x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i16x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x8(); + let b = b.as_i16x8(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i16x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i16x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4330,26 +4612,24 @@ pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_epi16_mask( - k1: __mmask8, - a: __m128i, - b: __m128i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i16x8(); - let b = b.as_i16x8(); - let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) +pub fn _mm_mask_cmp_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i16x8(); + let b = b.as_i16x8(); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i16x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4360,21 +4640,23 @@ pub unsafe fn _mm_mask_cmp_epi16_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x64(); - let b = b.as_i8x64(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x64::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x64::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x64(); + let b = b.as_i8x64(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x64::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x64::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4385,26 +4667,28 @@ pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_epi8_mask( +pub fn _mm512_mask_cmp_epi8_mask( k1: __mmask64, a: __m512i, b: __m512i, ) -> __mmask64 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x64(); - let b = b.as_i8x64(); - let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x64::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x64(); + let b = b.as_i8x64(); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x64::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4415,21 +4699,23 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x32(); - let b = b.as_i8x32(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x32::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x32::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x32(); + let b = b.as_i8x32(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x32::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x32::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4440,26 +4726,28 @@ pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_epi8_mask( +pub fn _mm256_mask_cmp_epi8_mask( k1: __mmask32, a: __m256i, b: __m256i, ) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x32(); - let b = b.as_i8x32(); - let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x32::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x32(); + let b = b.as_i8x32(); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x32::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -4470,21 +4758,23 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x16(); - let b = b.as_i8x16(); - let r = match IMM8 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i8x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i8x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x16(); + let b = b.as_i8x16(); + let r = match IMM8 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i8x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i8x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -4495,26 +4785,24 @@ pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_epi8_mask( - k1: __mmask16, - a: __m128i, - b: __m128i, -) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 3); - let a = a.as_i8x16(); - let b = b.as_i8x16(); - let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); - let r = match IMM8 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) +pub fn _mm_mask_cmp_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 3); + let a = a.as_i8x16(); + let b = b.as_i8x16(); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); + let r = match IMM8 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i8x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. @@ -4523,8 +4811,8 @@ pub unsafe fn _mm_mask_cmp_epi8_mask( #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { - simd_reduce_add_unordered(a.as_i16x16()) +pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_add_unordered(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4533,8 +4821,8 @@ pub unsafe fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) +pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } } /// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. @@ -4543,8 +4831,8 @@ pub unsafe fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_add_epi16(a: __m128i) -> i16 { - simd_reduce_add_unordered(a.as_i16x8()) +pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_add_unordered(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4553,8 +4841,8 @@ pub unsafe fn _mm_reduce_add_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) +pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4563,8 +4851,8 @@ pub unsafe fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { - simd_reduce_add_unordered(a.as_i8x32()) +pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_add_unordered(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4573,8 +4861,8 @@ pub unsafe fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) +pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4583,8 +4871,8 @@ pub unsafe fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_add_epi8(a: __m128i) -> i8 { - simd_reduce_add_unordered(a.as_i8x16()) +pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_add_unordered(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4593,8 +4881,8 @@ pub unsafe fn _mm_reduce_add_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) +pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } } /// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -4603,8 +4891,8 @@ pub unsafe fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_and_epi16(a: __m256i) -> i16 { - simd_reduce_and(a.as_i16x16()) +pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_and(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. @@ -4613,12 +4901,14 @@ pub unsafe fn _mm256_reduce_and_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i16x16(), - _mm256_set1_epi64x(-1).as_i16x16(), - )) +pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i16x16(), + _mm256_set1_epi64x(-1).as_i16x16(), + )) + } } /// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -4627,8 +4917,8 @@ pub unsafe fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_and_epi16(a: __m128i) -> i16 { - simd_reduce_and(a.as_i16x8()) +pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_and(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. @@ -4637,12 +4927,14 @@ pub unsafe fn _mm_reduce_and_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i16x8(), - _mm_set1_epi64x(-1).as_i16x8(), - )) +pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i16x8(), + _mm_set1_epi64x(-1).as_i16x8(), + )) + } } /// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -4651,8 +4943,8 @@ pub unsafe fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_and_epi8(a: __m256i) -> i8 { - simd_reduce_and(a.as_i8x32()) +pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_and(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. @@ -4661,12 +4953,14 @@ pub unsafe fn _mm256_reduce_and_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i8x32(), - _mm256_set1_epi64x(-1).as_i8x32(), - )) +pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i8x32(), + _mm256_set1_epi64x(-1).as_i8x32(), + )) + } } /// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -4675,8 +4969,8 @@ pub unsafe fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_and_epi8(a: __m128i) -> i8 { - simd_reduce_and(a.as_i8x16()) +pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_and(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. @@ -4685,12 +4979,14 @@ pub unsafe fn _mm_reduce_and_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i8x16(), - _mm_set1_epi64x(-1).as_i8x16(), - )) +pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { + simd_reduce_and(simd_select_bitmask( + k, + a.as_i8x16(), + _mm_set1_epi64x(-1).as_i8x16(), + )) + } } /// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4699,8 +4995,8 @@ pub unsafe fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_max_epi16(a: __m256i) -> i16 { - simd_reduce_max(a.as_i16x16()) +pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_max(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4709,8 +5005,8 @@ pub unsafe fn _mm256_reduce_max_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) +pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) } } /// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4719,8 +5015,8 @@ pub unsafe fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_max_epi16(a: __m128i) -> i16 { - simd_reduce_max(a.as_i16x8()) +pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_max(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4729,8 +5025,8 @@ pub unsafe fn _mm_reduce_max_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) +pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) } } /// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4739,8 +5035,8 @@ pub unsafe fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_max_epi8(a: __m256i) -> i8 { - simd_reduce_max(a.as_i8x32()) +pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_max(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4749,8 +5045,8 @@ pub unsafe fn _mm256_reduce_max_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) +pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) } } /// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4759,8 +5055,8 @@ pub unsafe fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_max_epi8(a: __m128i) -> i8 { - simd_reduce_max(a.as_i8x16()) +pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_max(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4769,8 +5065,8 @@ pub unsafe fn _mm_reduce_max_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) +pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) } } /// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4779,8 +5075,8 @@ pub unsafe fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_max_epu16(a: __m256i) -> u16 { - simd_reduce_max(a.as_u16x16()) +pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 { + unsafe { simd_reduce_max(a.as_u16x16()) } } /// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4789,8 +5085,8 @@ pub unsafe fn _mm256_reduce_max_epu16(a: __m256i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 { - simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) +pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) } } /// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4799,8 +5095,8 @@ pub unsafe fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_max_epu16(a: __m128i) -> u16 { - simd_reduce_max(a.as_u16x8()) +pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 { + unsafe { simd_reduce_max(a.as_u16x8()) } } /// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4809,8 +5105,8 @@ pub unsafe fn _mm_reduce_max_epu16(a: __m128i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 { - simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) +pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) } } /// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4819,8 +5115,8 @@ pub unsafe fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_max_epu8(a: __m256i) -> u8 { - simd_reduce_max(a.as_u8x32()) +pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 { + unsafe { simd_reduce_max(a.as_u8x32()) } } /// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4829,8 +5125,8 @@ pub unsafe fn _mm256_reduce_max_epu8(a: __m256i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 { - simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) +pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) } } /// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4839,8 +5135,8 @@ pub unsafe fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_max_epu8(a: __m128i) -> u8 { - simd_reduce_max(a.as_u8x16()) +pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 { + unsafe { simd_reduce_max(a.as_u8x16()) } } /// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -4849,8 +5145,8 @@ pub unsafe fn _mm_reduce_max_epu8(a: __m128i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 { - simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) +pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) } } /// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4859,8 +5155,8 @@ pub unsafe fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_min_epi16(a: __m256i) -> i16 { - simd_reduce_min(a.as_i16x16()) +pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_min(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4869,8 +5165,8 @@ pub unsafe fn _mm256_reduce_min_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) +pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) } } /// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4879,8 +5175,8 @@ pub unsafe fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_min_epi16(a: __m128i) -> i16 { - simd_reduce_min(a.as_i16x8()) +pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_min(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4889,8 +5185,8 @@ pub unsafe fn _mm_reduce_min_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) +pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) } } /// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4899,8 +5195,8 @@ pub unsafe fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_min_epi8(a: __m256i) -> i8 { - simd_reduce_min(a.as_i8x32()) +pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_min(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4909,8 +5205,8 @@ pub unsafe fn _mm256_reduce_min_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) +pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) } } /// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4919,8 +5215,8 @@ pub unsafe fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_min_epi8(a: __m128i) -> i8 { - simd_reduce_min(a.as_i8x16()) +pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_min(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4929,8 +5225,8 @@ pub unsafe fn _mm_reduce_min_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) +pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) } } /// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4939,8 +5235,8 @@ pub unsafe fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_min_epu16(a: __m256i) -> u16 { - simd_reduce_min(a.as_u16x16()) +pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 { + unsafe { simd_reduce_min(a.as_u16x16()) } } /// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4949,8 +5245,8 @@ pub unsafe fn _mm256_reduce_min_epu16(a: __m256i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 { - simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) +pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) } } /// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4959,8 +5255,8 @@ pub unsafe fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_min_epu16(a: __m128i) -> u16 { - simd_reduce_min(a.as_u16x8()) +pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 { + unsafe { simd_reduce_min(a.as_u16x8()) } } /// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4969,8 +5265,8 @@ pub unsafe fn _mm_reduce_min_epu16(a: __m128i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 { - simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) +pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) } } /// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4979,8 +5275,8 @@ pub unsafe fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_min_epu8(a: __m256i) -> u8 { - simd_reduce_min(a.as_u8x32()) +pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 { + unsafe { simd_reduce_min(a.as_u8x32()) } } /// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -4989,8 +5285,8 @@ pub unsafe fn _mm256_reduce_min_epu8(a: __m256i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 { - simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) +pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) } } /// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -4999,8 +5295,8 @@ pub unsafe fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_min_epu8(a: __m128i) -> u8 { - simd_reduce_min(a.as_u8x16()) +pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 { + unsafe { simd_reduce_min(a.as_u8x16()) } } /// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a. @@ -5009,8 +5305,8 @@ pub unsafe fn _mm_reduce_min_epu8(a: __m128i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 { - simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) +pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) } } /// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5019,8 +5315,8 @@ pub unsafe fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { - simd_reduce_mul_unordered(a.as_i16x16()) +pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_mul_unordered(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5029,8 +5325,8 @@ pub unsafe fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) +pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) } } /// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5039,8 +5335,8 @@ pub unsafe fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { - simd_reduce_mul_unordered(a.as_i16x8()) +pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_mul_unordered(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5049,8 +5345,8 @@ pub unsafe fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) +pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) } } /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5059,8 +5355,8 @@ pub unsafe fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { - simd_reduce_mul_unordered(a.as_i8x32()) +pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_mul_unordered(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5069,8 +5365,8 @@ pub unsafe fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) +pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) } } /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5079,8 +5375,8 @@ pub unsafe fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { - simd_reduce_mul_unordered(a.as_i8x16()) +pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_mul_unordered(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5089,8 +5385,8 @@ pub unsafe fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) +pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) } } /// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5099,8 +5395,8 @@ pub unsafe fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_or_epi16(a: __m256i) -> i16 { - simd_reduce_or(a.as_i16x16()) +pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 { + unsafe { simd_reduce_or(a.as_i16x16()) } } /// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -5109,8 +5405,8 @@ pub unsafe fn _mm256_reduce_or_epi16(a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) +pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } } /// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5119,8 +5415,8 @@ pub unsafe fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_or_epi16(a: __m128i) -> i16 { - simd_reduce_or(a.as_i16x8()) +pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 { + unsafe { simd_reduce_or(a.as_i16x8()) } } /// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -5129,8 +5425,8 @@ pub unsafe fn _mm_reduce_or_epi16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) +pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } } /// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5139,8 +5435,8 @@ pub unsafe fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_or_epi8(a: __m256i) -> i8 { - simd_reduce_or(a.as_i8x32()) +pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 { + unsafe { simd_reduce_or(a.as_i8x32()) } } /// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -5149,8 +5445,8 @@ pub unsafe fn _mm256_reduce_or_epi8(a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) +pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } } /// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5159,8 +5455,8 @@ pub unsafe fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_or_epi8(a: __m128i) -> i8 { - simd_reduce_or(a.as_i8x16()) +pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 { + unsafe { simd_reduce_or(a.as_i8x16()) } } /// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -5169,8 +5465,8 @@ pub unsafe fn _mm_reduce_or_epi8(a: __m128i) -> i8 { #[inline] #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) +pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } } /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. @@ -5540,8 +5836,8 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5551,14 +5847,11 @@ pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm512_mask_madd_epi16( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let madd = _mm512_madd_epi16(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, madd, src.as_i32x16())) +pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_madd_epi16(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, madd, src.as_i32x16())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5568,9 +5861,11 @@ pub unsafe fn _mm512_mask_madd_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let madd = _mm512_madd_epi16(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, madd, i32x16::ZERO)) +pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_madd_epi16(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, madd, i32x16::ZERO)) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5580,9 +5875,11 @@ pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let madd = _mm256_madd_epi16(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, madd, src.as_i32x8())) +pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_madd_epi16(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, madd, src.as_i32x8())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5592,9 +5889,11 @@ pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let madd = _mm256_madd_epi16(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, madd, i32x8::ZERO)) +pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_madd_epi16(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, madd, i32x8::ZERO)) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5604,9 +5903,11 @@ pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let madd = _mm_madd_epi16(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, madd, src.as_i32x4())) +pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_madd_epi16(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, madd, src.as_i32x4())) + } } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5616,9 +5917,11 @@ pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddwd))] -pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let madd = _mm_madd_epi16(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, madd, i32x4::ZERO)) +pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_madd_epi16(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, madd, i32x4::ZERO)) + } } /// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst. @@ -5628,8 +5931,8 @@ pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5639,14 +5942,11 @@ pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm512_mask_maddubs_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, madd, src.as_i16x32())) +pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, madd, src.as_i16x32())) + } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5656,9 +5956,11 @@ pub unsafe fn _mm512_mask_maddubs_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, madd, i16x32::ZERO)) +pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, madd, i16x32::ZERO)) + } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5668,14 +5970,11 @@ pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm256_mask_maddubs_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, madd, src.as_i16x16())) +pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, madd, src.as_i16x16())) + } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5685,9 +5984,11 @@ pub unsafe fn _mm256_mask_maddubs_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, madd, i16x16::ZERO)) +pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, madd, i16x16::ZERO)) + } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5697,9 +5998,11 @@ pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let madd = _mm_maddubs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, madd, src.as_i16x8())) +pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_maddubs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, madd, src.as_i16x8())) + } } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5709,9 +6012,11 @@ pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaddubsw))] -pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let madd = _mm_maddubs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, madd, i16x8::ZERO)) +pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let madd = _mm_maddubs_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, madd, i16x8::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst. @@ -5721,8 +6026,8 @@ pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5732,14 +6037,11 @@ pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm512_mask_packs_epi32( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let pack = _mm512_packs_epi32(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, pack, src.as_i16x32())) +pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, src.as_i16x32())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5749,9 +6051,11 @@ pub unsafe fn _mm512_mask_packs_epi32( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let pack = _mm512_packs_epi32(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) +pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5761,14 +6065,11 @@ pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm256_mask_packs_epi32( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let pack = _mm256_packs_epi32(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, pack, src.as_i16x16())) +pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, src.as_i16x16())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5778,9 +6079,11 @@ pub unsafe fn _mm256_mask_packs_epi32( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let pack = _mm256_packs_epi32(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) +pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5790,9 +6093,11 @@ pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packs_epi32(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, pack, src.as_i16x8())) +pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, src.as_i16x8())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5802,9 +6107,11 @@ pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packs_epi32(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) +pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst. @@ -5814,8 +6121,8 @@ pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5825,14 +6132,11 @@ pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm512_mask_packs_epi16( - src: __m512i, - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - let pack = _mm512_packs_epi16(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, pack, src.as_i8x64())) +pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, src.as_i8x64())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5842,9 +6146,11 @@ pub unsafe fn _mm512_mask_packs_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let pack = _mm512_packs_epi16(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) +pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packs_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5854,14 +6160,11 @@ pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm256_mask_packs_epi16( - src: __m256i, - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - let pack = _mm256_packs_epi16(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, pack, src.as_i8x32())) +pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, src.as_i8x32())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5871,9 +6174,11 @@ pub unsafe fn _mm256_mask_packs_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let pack = _mm256_packs_epi16(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) +pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packs_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5883,9 +6188,11 @@ pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packs_epi16(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, pack, src.as_i8x16())) +pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, src.as_i8x16())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5895,9 +6202,11 @@ pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packs_epi16(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) +pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packs_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst. @@ -5907,8 +6216,8 @@ pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5918,14 +6227,11 @@ pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm512_mask_packus_epi32( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let pack = _mm512_packus_epi32(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, pack, src.as_i16x32())) +pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, src.as_i16x32())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5935,9 +6241,11 @@ pub unsafe fn _mm512_mask_packus_epi32( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let pack = _mm512_packus_epi32(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) +pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi32(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5947,14 +6255,11 @@ pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm256_mask_packus_epi32( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let pack = _mm256_packus_epi32(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, pack, src.as_i16x16())) +pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, src.as_i16x16())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5964,9 +6269,11 @@ pub unsafe fn _mm256_mask_packus_epi32( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let pack = _mm256_packus_epi32(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) +pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi32(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5976,9 +6283,11 @@ pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packus_epi32(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, pack, src.as_i16x8())) +pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, src.as_i16x8())) + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5988,9 +6297,11 @@ pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packus_epi32(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) +pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi32(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst. @@ -6000,8 +6311,8 @@ pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { - transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) +pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6011,14 +6322,11 @@ pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm512_mask_packus_epi16( - src: __m512i, - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - let pack = _mm512_packus_epi16(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, pack, src.as_i8x64())) +pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, src.as_i8x64())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6028,9 +6336,11 @@ pub unsafe fn _mm512_mask_packus_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let pack = _mm512_packus_epi16(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) +pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let pack = _mm512_packus_epi16(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6040,14 +6350,11 @@ pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm256_mask_packus_epi16( - src: __m256i, - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - let pack = _mm256_packus_epi16(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, pack, src.as_i8x32())) +pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, src.as_i8x32())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6057,9 +6364,11 @@ pub unsafe fn _mm256_mask_packus_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let pack = _mm256_packus_epi16(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) +pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let pack = _mm256_packus_epi16(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6069,9 +6378,11 @@ pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packus_epi16(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, pack, src.as_i8x16())) +pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, src.as_i8x16())) + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6081,9 +6392,11 @@ pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let pack = _mm_packus_epi16(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) +pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let pack = _mm_packus_epi16(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst. @@ -6093,11 +6406,13 @@ pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { - let a = simd_cast::<_, u32x32>(a.as_u16x32()); - let b = simd_cast::<_, u32x32>(b.as_u16x32()); - let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1)); - transmute(simd_cast::<_, u16x32>(r)) +pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u32x32>(a.as_u16x32()); + let b = simd_cast::<_, u32x32>(b.as_u16x32()); + let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1)); + transmute(simd_cast::<_, u16x32>(r)) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6107,9 +6422,11 @@ pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let avg = _mm512_avg_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, avg, src.as_u16x32())) +pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, avg, src.as_u16x32())) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6119,9 +6436,11 @@ pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let avg = _mm512_avg_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, avg, u16x32::ZERO)) +pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu16(a, b).as_u16x32(); + transmute(simd_select_bitmask(k, avg, u16x32::ZERO)) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6131,9 +6450,11 @@ pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let avg = _mm256_avg_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, avg, src.as_u16x16())) +pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, avg, src.as_u16x16())) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6143,9 +6464,11 @@ pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let avg = _mm256_avg_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, avg, u16x16::ZERO)) +pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu16(a, b).as_u16x16(); + transmute(simd_select_bitmask(k, avg, u16x16::ZERO)) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6155,9 +6478,11 @@ pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let avg = _mm_avg_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, avg, src.as_u16x8())) +pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, avg, src.as_u16x8())) + } } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6167,9 +6492,11 @@ pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgw))] -pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let avg = _mm_avg_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, avg, u16x8::ZERO)) +pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu16(a, b).as_u16x8(); + transmute(simd_select_bitmask(k, avg, u16x8::ZERO)) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst. @@ -6179,11 +6506,13 @@ pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { - let a = simd_cast::<_, u16x64>(a.as_u8x64()); - let b = simd_cast::<_, u16x64>(b.as_u8x64()); - let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1)); - transmute(simd_cast::<_, u8x64>(r)) +pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, u16x64>(a.as_u8x64()); + let b = simd_cast::<_, u16x64>(b.as_u8x64()); + let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1)); + transmute(simd_cast::<_, u8x64>(r)) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6193,9 +6522,11 @@ pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let avg = _mm512_avg_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, avg, src.as_u8x64())) +pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, avg, src.as_u8x64())) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6205,9 +6536,11 @@ pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let avg = _mm512_avg_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, avg, u8x64::ZERO)) +pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let avg = _mm512_avg_epu8(a, b).as_u8x64(); + transmute(simd_select_bitmask(k, avg, u8x64::ZERO)) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6217,9 +6550,11 @@ pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let avg = _mm256_avg_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, avg, src.as_u8x32())) +pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, avg, src.as_u8x32())) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6229,9 +6564,11 @@ pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let avg = _mm256_avg_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, avg, u8x32::ZERO)) +pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let avg = _mm256_avg_epu8(a, b).as_u8x32(); + transmute(simd_select_bitmask(k, avg, u8x32::ZERO)) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6241,9 +6578,11 @@ pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let avg = _mm_avg_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, avg, src.as_u8x16())) +pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, avg, src.as_u8x16())) + } } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6253,9 +6592,11 @@ pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpavgb))] -pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let avg = _mm_avg_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, avg, u8x16::ZERO)) +pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let avg = _mm_avg_epu8(a, b).as_u8x16(); + transmute(simd_select_bitmask(k, avg, u8x16::ZERO)) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst. @@ -6265,8 +6606,8 @@ pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) +pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6276,14 +6617,11 @@ pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm512_mask_sll_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sll_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6293,9 +6631,11 @@ pub unsafe fn _mm512_mask_sll_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sll_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6305,14 +6645,11 @@ pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm256_mask_sll_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sll_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6322,9 +6659,11 @@ pub unsafe fn _mm256_mask_sll_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sll_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6334,9 +6673,11 @@ pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6346,9 +6687,11 @@ pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw))] -pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -6359,12 +6702,14 @@ pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_slli_epi16(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm512_setzero_si512() - } else { - transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))) +pub fn _mm512_slli_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))) + } } } @@ -6376,18 +6721,16 @@ pub unsafe fn _mm512_slli_epi16(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_slli_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 16 { - u16x32::ZERO - } else { - simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u16x32())) +pub fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x32::ZERO + } else { + simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x32())) + } } /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6398,13 +6741,15 @@ pub unsafe fn _mm512_mask_slli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm512_setzero_si512() - } else { - let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)); - transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) +pub fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) + } } } @@ -6416,18 +6761,16 @@ pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_slli_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 16 { - u16x16::ZERO - } else { - simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u16x16())) +pub fn _mm256_mask_slli_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x16::ZERO + } else { + simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x16())) + } } /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6438,13 +6781,15 @@ pub unsafe fn _mm256_mask_slli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm256_setzero_si256() - } else { - let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)); - transmute(simd_select_bitmask(k, shf, u16x16::ZERO)) +pub fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm256_setzero_si256() + } else { + let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x16::ZERO)) + } } } @@ -6456,18 +6801,16 @@ pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_slli_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 16 { - u16x8::ZERO - } else { - simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u16x8())) +pub fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x8::ZERO + } else { + simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x8())) + } } /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6478,13 +6821,15 @@ pub unsafe fn _mm_mask_slli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm_setzero_si128() - } else { - let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)); - transmute(simd_select_bitmask(k, shf, u16x8::ZERO)) +pub fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x8::ZERO)) + } } } @@ -6495,8 +6840,8 @@ pub unsafe fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) +pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6506,14 +6851,11 @@ pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm512_mask_sllv_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_sllv_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6523,9 +6865,11 @@ pub unsafe fn _mm512_mask_sllv_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_sllv_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6535,8 +6879,8 @@ pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { - transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) +pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6546,14 +6890,11 @@ pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm256_mask_sllv_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_sllv_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6563,9 +6904,11 @@ pub unsafe fn _mm256_mask_sllv_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_sllv_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6575,8 +6918,8 @@ pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6586,14 +6929,11 @@ pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm_mask_sllv_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_sllv_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6603,9 +6943,11 @@ pub unsafe fn _mm_mask_sllv_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvw))] -pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sllv_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -6615,8 +6957,8 @@ pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) +pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6626,14 +6968,11 @@ pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm512_mask_srl_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_srl_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6643,9 +6982,11 @@ pub unsafe fn _mm512_mask_srl_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_srl_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6655,14 +6996,11 @@ pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm256_mask_srl_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_srl_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6672,9 +7010,11 @@ pub unsafe fn _mm256_mask_srl_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_srl_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6684,9 +7024,11 @@ pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6696,9 +7038,11 @@ pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. @@ -6709,12 +7053,14 @@ pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srli_epi16(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm512_setzero_si512() - } else { - transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))) +pub fn _mm512_srli_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))) + } } } @@ -6726,18 +7072,16 @@ pub unsafe fn _mm512_srli_epi16(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srli_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 16 { - u16x32::ZERO - } else { - simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u16x32())) +pub fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 16 { + u16x32::ZERO + } else { + simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u16x32())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6748,14 +7092,16 @@ pub unsafe fn _mm512_mask_srli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - //imm8 should be u32, it seems the document to verify is incorrect - if IMM8 >= 16 { - _mm512_setzero_si512() - } else { - let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)); - transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) +pub fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + //imm8 should be u32, it seems the document to verify is incorrect + if IMM8 >= 16 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)); + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) + } } } @@ -6767,14 +7113,12 @@ pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srli_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_srli_epi16::(a); - transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16())) +pub fn _mm256_mask_srli_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6785,10 +7129,12 @@ pub unsafe fn _mm256_mask_srli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_srli_epi16::(a); - transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO)) +pub fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6799,14 +7145,12 @@ pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srli_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_srli_epi16::(a); - transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8())) +pub fn _mm_mask_srli_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6817,10 +7161,12 @@ pub unsafe fn _mm_mask_srli_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_srli_epi16::(a); - transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO)) +pub fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_srli_epi16::(a); + transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6830,8 +7176,8 @@ pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) +pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6841,14 +7187,11 @@ pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm512_mask_srlv_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srlv_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6858,9 +7201,11 @@ pub unsafe fn _mm512_mask_srlv_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srlv_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6870,8 +7215,8 @@ pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { - transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) +pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6881,14 +7226,11 @@ pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm256_mask_srlv_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srlv_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6898,9 +7240,11 @@ pub unsafe fn _mm256_mask_srlv_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srlv_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6910,8 +7254,8 @@ pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6921,14 +7265,11 @@ pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm_mask_srlv_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srlv_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6938,9 +7279,11 @@ pub unsafe fn _mm_mask_srlv_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvw))] -pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srlv_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -6950,8 +7293,8 @@ pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) +pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6961,14 +7304,11 @@ pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm512_mask_sra_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sra_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -6978,9 +7318,11 @@ pub unsafe fn _mm512_mask_sra_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sra_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6990,14 +7332,11 @@ pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm256_mask_sra_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sra_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7007,9 +7346,11 @@ pub unsafe fn _mm256_mask_sra_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sra_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7019,9 +7360,11 @@ pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7031,9 +7374,11 @@ pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw))] -pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -7044,9 +7389,11 @@ pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srai_epi16(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16))) +pub fn _mm512_srai_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16))) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7057,14 +7404,12 @@ pub unsafe fn _mm512_srai_epi16(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srai_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7075,10 +7420,12 @@ pub unsafe fn _mm512_mask_srai_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7089,14 +7436,12 @@ pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srai_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, r, src.as_i16x16())) +pub fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7107,10 +7452,12 @@ pub unsafe fn _mm256_mask_srai_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, r, i16x16::ZERO)) +pub fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7121,14 +7468,12 @@ pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srai_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, r, src.as_i16x8())) +pub fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7139,10 +7484,12 @@ pub unsafe fn _mm_mask_srai_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); - transmute(simd_select_bitmask(k, r, i16x8::ZERO)) +pub fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7152,8 +7499,8 @@ pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) +pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7163,14 +7510,11 @@ pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm512_mask_srav_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srav_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) +pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7180,9 +7524,11 @@ pub unsafe fn _mm512_mask_srav_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srav_epi16(a, count).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi16(a, count).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7192,8 +7538,8 @@ pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { - transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) +pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7203,14 +7549,11 @@ pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm256_mask_srav_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srav_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) +pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7220,9 +7563,11 @@ pub unsafe fn _mm256_mask_srav_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srav_epi16(a, count).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi16(a, count).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7232,8 +7577,8 @@ pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7243,14 +7588,11 @@ pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm_mask_srav_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srav_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) +pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7260,9 +7602,11 @@ pub unsafe fn _mm_mask_srav_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravw))] -pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srav_epi16(a, count).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi16(a, count).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -7272,8 +7616,8 @@ pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { - transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) +pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -7283,14 +7627,16 @@ pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2w))] -pub unsafe fn _mm512_mask_permutex2var_epi16( +pub fn _mm512_mask_permutex2var_epi16( a: __m512i, k: __mmask32, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); - transmute(simd_select_bitmask(k, permute, a.as_i16x32())) + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, a.as_i16x32())) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7300,14 +7646,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm512_maskz_permutex2var_epi16( +pub fn _mm512_maskz_permutex2var_epi16( k: __mmask32, a: __m512i, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); - transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7317,14 +7665,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2w))] -pub unsafe fn _mm512_mask2_permutex2var_epi16( +pub fn _mm512_mask2_permutex2var_epi16( a: __m512i, idx: __m512i, k: __mmask32, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); - transmute(simd_select_bitmask(k, permute, idx.as_i16x32())) + unsafe { + let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x32())) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -7334,8 +7684,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { - transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) +pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -7345,14 +7695,16 @@ pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2w))] -pub unsafe fn _mm256_mask_permutex2var_epi16( +pub fn _mm256_mask_permutex2var_epi16( a: __m256i, k: __mmask16, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); - transmute(simd_select_bitmask(k, permute, a.as_i16x16())) + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, a.as_i16x16())) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7362,14 +7714,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm256_maskz_permutex2var_epi16( +pub fn _mm256_maskz_permutex2var_epi16( k: __mmask16, a: __m256i, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); - transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7379,14 +7733,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2w))] -pub unsafe fn _mm256_mask2_permutex2var_epi16( +pub fn _mm256_mask2_permutex2var_epi16( a: __m256i, idx: __m256i, k: __mmask16, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); - transmute(simd_select_bitmask(k, permute, idx.as_i16x16())) + unsafe { + let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x16())) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -7396,8 +7752,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { - transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) +pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -7407,14 +7763,11 @@ pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2w))] -pub unsafe fn _mm_mask_permutex2var_epi16( - a: __m128i, - k: __mmask8, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); - transmute(simd_select_bitmask(k, permute, a.as_i16x8())) +pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, a.as_i16x8())) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7424,14 +7777,11 @@ pub unsafe fn _mm_mask_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w -pub unsafe fn _mm_maskz_permutex2var_epi16( - k: __mmask8, - a: __m128i, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); - transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) +pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) + } } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7441,14 +7791,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2w))] -pub unsafe fn _mm_mask2_permutex2var_epi16( - a: __m128i, - idx: __m128i, - k: __mmask8, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); - transmute(simd_select_bitmask(k, permute, idx.as_i16x8())) +pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i16x8())) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -7458,8 +7805,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { - transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) +pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7469,14 +7816,16 @@ pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm512_mask_permutexvar_epi16( +pub fn _mm512_mask_permutexvar_epi16( src: __m512i, k: __mmask32, idx: __m512i, a: __m512i, ) -> __m512i { - let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); - transmute(simd_select_bitmask(k, permute, src.as_i16x32())) + unsafe { + let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); + transmute(simd_select_bitmask(k, permute, src.as_i16x32())) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7486,9 +7835,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { - let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); - transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) +pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -7498,8 +7849,8 @@ pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m5 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { - transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) +pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7509,14 +7860,16 @@ pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm256_mask_permutexvar_epi16( +pub fn _mm256_mask_permutexvar_epi16( src: __m256i, k: __mmask16, idx: __m256i, a: __m256i, ) -> __m256i { - let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); - transmute(simd_select_bitmask(k, permute, src.as_i16x16())) + unsafe { + let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); + transmute(simd_select_bitmask(k, permute, src.as_i16x16())) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7526,9 +7879,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { - let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); - transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) +pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -7538,8 +7893,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m2 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { - transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) +pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { + unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7549,14 +7904,11 @@ pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm_mask_permutexvar_epi16( - src: __m128i, - k: __mmask8, - idx: __m128i, - a: __m128i, -) -> __m128i { - let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); - transmute(simd_select_bitmask(k, permute, src.as_i16x8())) +pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); + transmute(simd_select_bitmask(k, permute, src.as_i16x8())) + } } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7566,9 +7918,11 @@ pub unsafe fn _mm_mask_permutexvar_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermw))] -pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { - let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); - transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) +pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) + } } /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. @@ -7578,8 +7932,8 @@ pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw -pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) +pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) } } /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. @@ -7589,8 +7943,8 @@ pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw -pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) +pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) } } /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. @@ -7600,8 +7954,8 @@ pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw -pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) +pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) } } /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. @@ -7611,8 +7965,8 @@ pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb -pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) +pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) } } /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. @@ -7622,8 +7976,8 @@ pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb -pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) +pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) } } /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. @@ -7633,8 +7987,8 @@ pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb -pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) +pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) } } /// Broadcast the low packed 16-bit integer from a to all elements of dst. @@ -7644,17 +7998,19 @@ pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { - let a = _mm512_castsi128_si512(a).as_i16x32(); - let ret: i16x32 = simd_shuffle!( - a, - a, - [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, - ], - ); - transmute(ret) +pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i16x32(); + let ret: i16x32 = simd_shuffle!( + a, + a, + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + ], + ); + transmute(ret) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7664,9 +8020,11 @@ pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, broadcast, src.as_i16x32())) +pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x32())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7676,9 +8034,11 @@ pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO)) +pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO)) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7688,9 +8048,11 @@ pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_i16x16())) +pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x16())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7700,9 +8062,11 @@ pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO)) +pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO)) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7712,9 +8076,11 @@ pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_i16x8())) +pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i16x8())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7724,9 +8090,11 @@ pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO)) +pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO)) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst. @@ -7736,18 +8104,20 @@ pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { - let a = _mm512_castsi128_si512(a).as_i8x64(); - let ret: i8x64 = simd_shuffle!( - a, - a, - [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - ], - ); - transmute(ret) +pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i8x64(); + let ret: i8x64 = simd_shuffle!( + a, + a, + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); + transmute(ret) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7757,9 +8127,11 @@ pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, broadcast, src.as_i8x64())) +pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x64())) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7769,9 +8141,11 @@ pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO)) +pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO)) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7781,9 +8155,11 @@ pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, broadcast, src.as_i8x32())) +pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x32())) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7793,9 +8169,11 @@ pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO)) +pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO)) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7805,9 +8183,11 @@ pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_i8x16())) +pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i8x16())) + } } /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7817,9 +8197,11 @@ pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastb))] -pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO)) +pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO)) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -7829,25 +8211,27 @@ pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i16x32(); - let b = b.as_i16x32(); - #[rustfmt::skip] - let r: i16x32 = simd_shuffle!( - a, - b, - [ - 4, 32 + 4, 5, 32 + 5, - 6, 32 + 6, 7, 32 + 7, - 12, 32 + 12, 13, 32 + 13, - 14, 32 + 14, 15, 32 + 15, - 20, 32 + 20, 21, 32 + 21, - 22, 32 + 22, 23, 32 + 23, - 28, 32 + 28, 29, 32 + 29, - 30, 32 + 30, 31, 32 + 31, - ], - ); - transmute(r) +pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + #[rustfmt::skip] + let r: i16x32 = simd_shuffle!( + a, + b, + [ + 4, 32 + 4, 5, 32 + 5, + 6, 32 + 6, 7, 32 + 7, + 12, 32 + 12, 13, 32 + 13, + 14, 32 + 14, 15, 32 + 15, + 20, 32 + 20, 21, 32 + 21, + 22, 32 + 22, 23, 32 + 23, + 28, 32 + 28, 29, 32 + 29, + 30, 32 + 30, 31, 32 + 31, + ], + ); + transmute(r) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7857,14 +8241,11 @@ pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm512_mask_unpackhi_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32())) +pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32())) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7874,9 +8255,11 @@ pub unsafe fn _mm512_mask_unpackhi_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO)) +pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO)) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7886,14 +8269,11 @@ pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm256_mask_unpackhi_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16())) +pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16())) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7903,9 +8283,11 @@ pub unsafe fn _mm256_mask_unpackhi_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO)) +pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO)) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7915,14 +8297,11 @@ pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm_mask_unpackhi_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8())) +pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8())) + } } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7932,9 +8311,11 @@ pub unsafe fn _mm_mask_unpackhi_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhwd))] -pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO)) +pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO)) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -7944,33 +8325,35 @@ pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i8x64(); - let b = b.as_i8x64(); - #[rustfmt::skip] - let r: i8x64 = simd_shuffle!( - a, - b, - [ - 8, 64+8, 9, 64+9, - 10, 64+10, 11, 64+11, - 12, 64+12, 13, 64+13, - 14, 64+14, 15, 64+15, - 24, 64+24, 25, 64+25, - 26, 64+26, 27, 64+27, - 28, 64+28, 29, 64+29, - 30, 64+30, 31, 64+31, - 40, 64+40, 41, 64+41, - 42, 64+42, 43, 64+43, - 44, 64+44, 45, 64+45, - 46, 64+46, 47, 64+47, - 56, 64+56, 57, 64+57, - 58, 64+58, 59, 64+59, - 60, 64+60, 61, 64+61, - 62, 64+62, 63, 64+63, - ], - ); - transmute(r) +pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + #[rustfmt::skip] + let r: i8x64 = simd_shuffle!( + a, + b, + [ + 8, 64 + 8, 9, 64 + 9, + 10, 64 + 10, 11, 64 + 11, + 12, 64 + 12, 13, 64 + 13, + 14, 64 + 14, 15, 64 + 15, + 24, 64 + 24, 25, 64 + 25, + 26, 64 + 26, 27, 64 + 27, + 28, 64 + 28, 29, 64 + 29, + 30, 64 + 30, 31, 64 + 31, + 40, 64 + 40, 41, 64 + 41, + 42, 64 + 42, 43, 64 + 43, + 44, 64 + 44, 45, 64 + 45, + 46, 64 + 46, 47, 64 + 47, + 56, 64 + 56, 57, 64 + 57, + 58, 64 + 58, 59, 64 + 59, + 60, 64 + 60, 61, 64 + 61, + 62, 64 + 62, 63, 64 + 63, + ], + ); + transmute(r) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7980,14 +8363,11 @@ pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm512_mask_unpackhi_epi8( - src: __m512i, - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64())) +pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64())) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -7997,9 +8377,11 @@ pub unsafe fn _mm512_mask_unpackhi_epi8( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO)) +pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO)) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8009,14 +8391,11 @@ pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm256_mask_unpackhi_epi8( - src: __m256i, - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32())) +pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32())) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8026,9 +8405,11 @@ pub unsafe fn _mm256_mask_unpackhi_epi8( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO)) +pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO)) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8038,14 +8419,11 @@ pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm_mask_unpackhi_epi8( - src: __m128i, - k: __mmask16, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16())) +pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16())) + } } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8055,9 +8433,11 @@ pub unsafe fn _mm_mask_unpackhi_epi8( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhbw))] -pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO)) +pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO)) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -8067,25 +8447,27 @@ pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i16x32(); - let b = b.as_i16x32(); - #[rustfmt::skip] - let r: i16x32 = simd_shuffle!( - a, - b, - [ - 0, 32+0, 1, 32+1, - 2, 32+2, 3, 32+3, - 8, 32+8, 9, 32+9, - 10, 32+10, 11, 32+11, - 16, 32+16, 17, 32+17, - 18, 32+18, 19, 32+19, - 24, 32+24, 25, 32+25, - 26, 32+26, 27, 32+27 - ], - ); - transmute(r) +pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i16x32(); + let b = b.as_i16x32(); + #[rustfmt::skip] + let r: i16x32 = simd_shuffle!( + a, + b, + [ + 0, 32+0, 1, 32+1, + 2, 32+2, 3, 32+3, + 8, 32+8, 9, 32+9, + 10, 32+10, 11, 32+11, + 16, 32+16, 17, 32+17, + 18, 32+18, 19, 32+19, + 24, 32+24, 25, 32+25, + 26, 32+26, 27, 32+27 + ], + ); + transmute(r) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8095,14 +8477,11 @@ pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm512_mask_unpacklo_epi16( - src: __m512i, - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32())) +pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32())) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8112,9 +8491,11 @@ pub unsafe fn _mm512_mask_unpacklo_epi16( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { - let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO)) +pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO)) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8124,14 +8505,11 @@ pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm256_mask_unpacklo_epi16( - src: __m256i, - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16())) +pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16())) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8141,9 +8519,11 @@ pub unsafe fn _mm256_mask_unpacklo_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { - let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO)) +pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO)) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8153,14 +8533,11 @@ pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm_mask_unpacklo_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8())) +pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8())) + } } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8170,9 +8547,11 @@ pub unsafe fn _mm_mask_unpacklo_epi16( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklwd))] -pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO)) +pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO)) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -8182,33 +8561,35 @@ pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i8x64(); - let b = b.as_i8x64(); - #[rustfmt::skip] - let r: i8x64 = simd_shuffle!( - a, - b, - [ - 0, 64+0, 1, 64+1, - 2, 64+2, 3, 64+3, - 4, 64+4, 5, 64+5, - 6, 64+6, 7, 64+7, - 16, 64+16, 17, 64+17, - 18, 64+18, 19, 64+19, - 20, 64+20, 21, 64+21, - 22, 64+22, 23, 64+23, - 32, 64+32, 33, 64+33, - 34, 64+34, 35, 64+35, - 36, 64+36, 37, 64+37, - 38, 64+38, 39, 64+39, - 48, 64+48, 49, 64+49, - 50, 64+50, 51, 64+51, - 52, 64+52, 53, 64+53, - 54, 64+54, 55, 64+55, - ], - ); - transmute(r) +pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i8x64(); + let b = b.as_i8x64(); + #[rustfmt::skip] + let r: i8x64 = simd_shuffle!( + a, + b, + [ + 0, 64+0, 1, 64+1, + 2, 64+2, 3, 64+3, + 4, 64+4, 5, 64+5, + 6, 64+6, 7, 64+7, + 16, 64+16, 17, 64+17, + 18, 64+18, 19, 64+19, + 20, 64+20, 21, 64+21, + 22, 64+22, 23, 64+23, + 32, 64+32, 33, 64+33, + 34, 64+34, 35, 64+35, + 36, 64+36, 37, 64+37, + 38, 64+38, 39, 64+39, + 48, 64+48, 49, 64+49, + 50, 64+50, 51, 64+51, + 52, 64+52, 53, 64+53, + 54, 64+54, 55, 64+55, + ], + ); + transmute(r) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8218,14 +8599,11 @@ pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm512_mask_unpacklo_epi8( - src: __m512i, - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64())) +pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64())) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8235,9 +8613,11 @@ pub unsafe fn _mm512_mask_unpacklo_epi8( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO)) +pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO)) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8247,14 +8627,11 @@ pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm256_mask_unpacklo_epi8( - src: __m256i, - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32())) +pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32())) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8264,9 +8641,11 @@ pub unsafe fn _mm256_mask_unpacklo_epi8( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO)) +pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO)) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8276,14 +8655,11 @@ pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm_mask_unpacklo_epi8( - src: __m128i, - k: __mmask16, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16())) +pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16())) + } } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8293,9 +8669,11 @@ pub unsafe fn _mm_mask_unpacklo_epi8( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklbw))] -pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO)) +pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO)) + } } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8305,9 +8683,11 @@ pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { - let mov = a.as_i16x32(); - transmute(simd_select_bitmask(k, mov, src.as_i16x32())) +pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i16x32(); + transmute(simd_select_bitmask(k, mov, src.as_i16x32())) + } } /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8317,9 +8697,11 @@ pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { - let mov = a.as_i16x32(); - transmute(simd_select_bitmask(k, mov, i16x32::ZERO)) +pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i16x32(); + transmute(simd_select_bitmask(k, mov, i16x32::ZERO)) + } } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8329,9 +8711,11 @@ pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { - let mov = a.as_i16x16(); - transmute(simd_select_bitmask(k, mov, src.as_i16x16())) +pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i16x16(); + transmute(simd_select_bitmask(k, mov, src.as_i16x16())) + } } /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8341,9 +8725,11 @@ pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { - let mov = a.as_i16x16(); - transmute(simd_select_bitmask(k, mov, i16x16::ZERO)) +pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i16x16(); + transmute(simd_select_bitmask(k, mov, i16x16::ZERO)) + } } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8353,9 +8739,11 @@ pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i16x8(); - transmute(simd_select_bitmask(k, mov, src.as_i16x8())) +pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i16x8(); + transmute(simd_select_bitmask(k, mov, src.as_i16x8())) + } } /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8365,9 +8753,11 @@ pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu16))] -pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i16x8(); - transmute(simd_select_bitmask(k, mov, i16x8::ZERO)) +pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i16x8(); + transmute(simd_select_bitmask(k, mov, i16x8::ZERO)) + } } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8377,9 +8767,11 @@ pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { - let mov = a.as_i8x64(); - transmute(simd_select_bitmask(k, mov, src.as_i8x64())) +pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i8x64(); + transmute(simd_select_bitmask(k, mov, src.as_i8x64())) + } } /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8389,9 +8781,11 @@ pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { - let mov = a.as_i8x64(); - transmute(simd_select_bitmask(k, mov, i8x64::ZERO)) +pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i8x64(); + transmute(simd_select_bitmask(k, mov, i8x64::ZERO)) + } } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8401,9 +8795,11 @@ pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { - let mov = a.as_i8x32(); - transmute(simd_select_bitmask(k, mov, src.as_i8x32())) +pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i8x32(); + transmute(simd_select_bitmask(k, mov, src.as_i8x32())) + } } /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8413,9 +8809,11 @@ pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { - let mov = a.as_i8x32(); - transmute(simd_select_bitmask(k, mov, i8x32::ZERO)) +pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i8x32(); + transmute(simd_select_bitmask(k, mov, i8x32::ZERO)) + } } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8425,9 +8823,11 @@ pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - let mov = a.as_i8x16(); - transmute(simd_select_bitmask(k, mov, src.as_i8x16())) +pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i8x16(); + transmute(simd_select_bitmask(k, mov, src.as_i8x16())) + } } /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8437,9 +8837,11 @@ pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m12 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqu8))] -pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { - let mov = a.as_i8x16(); - transmute(simd_select_bitmask(k, mov, i8x16::ZERO)) +pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i8x16(); + transmute(simd_select_bitmask(k, mov, i8x16::ZERO)) + } } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8449,9 +8851,11 @@ pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i { - let r = _mm512_set1_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, r, src.as_i16x32())) +pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i { + unsafe { + let r = _mm512_set1_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, r, src.as_i16x32())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8461,9 +8865,11 @@ pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m5 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { - let r = _mm512_set1_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, r, i16x32::ZERO)) +pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { + unsafe { + let r = _mm512_set1_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, r, i16x32::ZERO)) + } } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8473,9 +8879,11 @@ pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i { - let r = _mm256_set1_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, r, src.as_i16x16())) +pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i { + unsafe { + let r = _mm256_set1_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, r, src.as_i16x16())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8485,9 +8893,11 @@ pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m2 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { - let r = _mm256_set1_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, r, i16x16::ZERO)) +pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { + unsafe { + let r = _mm256_set1_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) + } } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8497,9 +8907,11 @@ pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i { - let r = _mm_set1_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, r, src.as_i16x8())) +pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i { + unsafe { + let r = _mm_set1_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, r, src.as_i16x8())) + } } /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8509,9 +8921,11 @@ pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastw))] -pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { - let r = _mm_set1_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, r, i16x8::ZERO)) +pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { + unsafe { + let r = _mm_set1_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) + } } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8521,9 +8935,11 @@ pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i { - let r = _mm512_set1_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, r, src.as_i8x64())) +pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i { + unsafe { + let r = _mm512_set1_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, r, src.as_i8x64())) + } } /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8533,9 +8949,11 @@ pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512 #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { - let r = _mm512_set1_epi8(a).as_i8x64(); - transmute(simd_select_bitmask(k, r, i8x64::ZERO)) +pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { + unsafe { + let r = _mm512_set1_epi8(a).as_i8x64(); + transmute(simd_select_bitmask(k, r, i8x64::ZERO)) + } } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8545,9 +8963,11 @@ pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i { - let r = _mm256_set1_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, r, src.as_i8x32())) +pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i { + unsafe { + let r = _mm256_set1_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, r, src.as_i8x32())) + } } /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8557,9 +8977,11 @@ pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { - let r = _mm256_set1_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, r, i8x32::ZERO)) +pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { + unsafe { + let r = _mm256_set1_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, r, i8x32::ZERO)) + } } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8569,9 +8991,11 @@ pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { - let r = _mm_set1_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, r, src.as_i8x16())) +pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { + unsafe { + let r = _mm_set1_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, r, src.as_i8x16())) + } } /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8581,9 +9005,11 @@ pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] -pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { - let r = _mm_set1_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, r, i8x16::ZERO)) +pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { + unsafe { + let r = _mm_set1_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, r, i8x16::ZERO)) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst. @@ -8594,48 +9020,50 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle!( - a, - a, - [ - IMM8 as u32 & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - 4, - 5, - 6, - 7, - (IMM8 as u32 & 0b11) + 8, - ((IMM8 as u32 >> 2) & 0b11) + 8, - ((IMM8 as u32 >> 4) & 0b11) + 8, - ((IMM8 as u32 >> 6) & 0b11) + 8, - 12, - 13, - 14, - 15, - (IMM8 as u32 & 0b11) + 16, - ((IMM8 as u32 >> 2) & 0b11) + 16, - ((IMM8 as u32 >> 4) & 0b11) + 16, - ((IMM8 as u32 >> 6) & 0b11) + 16, - 20, - 21, - 22, - 23, - (IMM8 as u32 & 0b11) + 24, - ((IMM8 as u32 >> 2) & 0b11) + 24, - ((IMM8 as u32 >> 4) & 0b11) + 24, - ((IMM8 as u32 >> 6) & 0b11) + 24, - 28, - 29, - 30, - 31, - ], - ); - transmute(r) +pub fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i16x32(); + let r: i16x32 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + 4, + 5, + 6, + 7, + (IMM8 as u32 & 0b11) + 8, + ((IMM8 as u32 >> 2) & 0b11) + 8, + ((IMM8 as u32 >> 4) & 0b11) + 8, + ((IMM8 as u32 >> 6) & 0b11) + 8, + 12, + 13, + 14, + 15, + (IMM8 as u32 & 0b11) + 16, + ((IMM8 as u32 >> 2) & 0b11) + 16, + ((IMM8 as u32 >> 4) & 0b11) + 16, + ((IMM8 as u32 >> 6) & 0b11) + 16, + 20, + 21, + 22, + 23, + (IMM8 as u32 & 0b11) + 24, + ((IMM8 as u32 >> 2) & 0b11) + 24, + ((IMM8 as u32 >> 4) & 0b11) + 24, + ((IMM8 as u32 >> 6) & 0b11) + 24, + 28, + 29, + 30, + 31, + ], + ); + transmute(r) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8646,14 +9074,16 @@ pub unsafe fn _mm512_shufflelo_epi16(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_shufflelo_epi16( +pub fn _mm512_mask_shufflelo_epi16( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8664,10 +9094,12 @@ pub unsafe fn _mm512_mask_shufflelo_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) +pub fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8678,14 +9110,16 @@ pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_shufflelo_epi16( +pub fn _mm256_mask_shufflelo_epi16( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm256_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8696,10 +9130,12 @@ pub unsafe fn _mm256_mask_shufflelo_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm256_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) +pub fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8710,14 +9146,12 @@ pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_shufflelo_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) +pub fn _mm_mask_shufflelo_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) + } } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8728,10 +9162,12 @@ pub unsafe fn _mm_mask_shufflelo_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm_shufflelo_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) +pub fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflelo_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst. @@ -8742,66 +9178,70 @@ pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_shufflehi_epi16(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x32(); - let r: i16x32 = simd_shuffle!( - a, - a, - [ - 0, - 1, - 2, - 3, - (IMM8 as u32 & 0b11) + 4, - ((IMM8 as u32 >> 2) & 0b11) + 4, - ((IMM8 as u32 >> 4) & 0b11) + 4, - ((IMM8 as u32 >> 6) & 0b11) + 4, - 8, - 9, - 10, - 11, - (IMM8 as u32 & 0b11) + 12, - ((IMM8 as u32 >> 2) & 0b11) + 12, - ((IMM8 as u32 >> 4) & 0b11) + 12, - ((IMM8 as u32 >> 6) & 0b11) + 12, - 16, - 17, - 18, - 19, - (IMM8 as u32 & 0b11) + 20, - ((IMM8 as u32 >> 2) & 0b11) + 20, - ((IMM8 as u32 >> 4) & 0b11) + 20, - ((IMM8 as u32 >> 6) & 0b11) + 20, - 24, - 25, - 26, - 27, - (IMM8 as u32 & 0b11) + 28, - ((IMM8 as u32 >> 2) & 0b11) + 28, - ((IMM8 as u32 >> 4) & 0b11) + 28, - ((IMM8 as u32 >> 6) & 0b11) + 28, - ], - ); - transmute(r) -} - -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). -/// +pub fn _mm512_shufflehi_epi16(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i16x32(); + let r: i16x32 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + (IMM8 as u32 & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + 8, + 9, + 10, + 11, + (IMM8 as u32 & 0b11) + 12, + ((IMM8 as u32 >> 2) & 0b11) + 12, + ((IMM8 as u32 >> 4) & 0b11) + 12, + ((IMM8 as u32 >> 6) & 0b11) + 12, + 16, + 17, + 18, + 19, + (IMM8 as u32 & 0b11) + 20, + ((IMM8 as u32 >> 2) & 0b11) + 20, + ((IMM8 as u32 >> 4) & 0b11) + 20, + ((IMM8 as u32 >> 6) & 0b11) + 20, + 24, + 25, + 26, + 27, + (IMM8 as u32 & 0b11) + 28, + ((IMM8 as u32 >> 2) & 0b11) + 28, + ((IMM8 as u32 >> 4) & 0b11) + 28, + ((IMM8 as u32 >> 6) & 0b11) + 28, + ], + ); + transmute(r) + } +} + +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210) #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_shufflehi_epi16( +pub fn _mm512_mask_shufflehi_epi16( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8812,10 +9252,12 @@ pub unsafe fn _mm512_mask_shufflehi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) +pub fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8826,14 +9268,16 @@ pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_shufflehi_epi16( +pub fn _mm256_mask_shufflehi_epi16( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm256_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8844,10 +9288,12 @@ pub unsafe fn _mm256_mask_shufflehi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm256_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) +pub fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm256_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8858,14 +9304,12 @@ pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_shufflehi_epi16( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) +pub fn _mm_mask_shufflehi_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) + } } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8876,10 +9320,12 @@ pub unsafe fn _mm_mask_shufflehi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shuffle = _mm_shufflehi_epi16::(a); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) +pub fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shuffle = _mm_shufflehi_epi16::(a); + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) + } } /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst. @@ -8889,8 +9335,8 @@ pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) } } /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8900,14 +9346,11 @@ pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm512_mask_shuffle_epi8( - src: __m512i, - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, shuffle, src.as_i8x64())) +pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x64())) + } } /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8917,9 +9360,11 @@ pub unsafe fn _mm512_mask_shuffle_epi8( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO)) +pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO)) + } } /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8929,14 +9374,11 @@ pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm256_mask_shuffle_epi8( - src: __m256i, - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, shuffle, src.as_i8x32())) +pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x32())) + } } /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8946,9 +9388,11 @@ pub unsafe fn _mm256_mask_shuffle_epi8( #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO)) +pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO)) + } } /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8958,9 +9402,11 @@ pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, shuffle, src.as_i8x16())) +pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, shuffle, src.as_i8x16())) + } } /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -8970,9 +9416,11 @@ pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufb))] -pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO)) +pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO)) + } } /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. @@ -8982,7 +9430,7 @@ pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpneq_epi16_mask(and, zero) @@ -8995,7 +9443,7 @@ pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpneq_epi16_mask(k, and, zero) @@ -9008,7 +9456,7 @@ pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpneq_epi16_mask(and, zero) @@ -9021,7 +9469,7 @@ pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpneq_epi16_mask(k, and, zero) @@ -9034,7 +9482,7 @@ pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpneq_epi16_mask(and, zero) @@ -9047,7 +9495,7 @@ pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmw))] -pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpneq_epi16_mask(k, and, zero) @@ -9060,7 +9508,7 @@ pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpneq_epi8_mask(and, zero) @@ -9073,7 +9521,7 @@ pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpneq_epi8_mask(k, and, zero) @@ -9086,7 +9534,7 @@ pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpneq_epi8_mask(and, zero) @@ -9099,7 +9547,7 @@ pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpneq_epi8_mask(k, and, zero) @@ -9112,7 +9560,7 @@ pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpneq_epi8_mask(and, zero) @@ -9125,7 +9573,7 @@ pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmb))] -pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpneq_epi8_mask(k, and, zero) @@ -9138,7 +9586,7 @@ pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpeq_epi16_mask(and, zero) @@ -9151,7 +9599,7 @@ pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { +pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpeq_epi16_mask(k, and, zero) @@ -9164,7 +9612,7 @@ pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpeq_epi16_mask(and, zero) @@ -9177,7 +9625,7 @@ pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { +pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpeq_epi16_mask(k, and, zero) @@ -9190,7 +9638,7 @@ pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpeq_epi16_mask(and, zero) @@ -9203,7 +9651,7 @@ pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmw))] -pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpeq_epi16_mask(k, and, zero) @@ -9216,7 +9664,7 @@ pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpeq_epi8_mask(and, zero) @@ -9229,7 +9677,7 @@ pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { +pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { let and = _mm512_and_si512(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpeq_epi8_mask(k, and, zero) @@ -9242,7 +9690,7 @@ pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpeq_epi8_mask(and, zero) @@ -9255,7 +9703,7 @@ pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { +pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpeq_epi8_mask(k, and, zero) @@ -9268,7 +9716,7 @@ pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpeq_epi8_mask(and, zero) @@ -9281,7 +9729,7 @@ pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmb))] -pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { +pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpeq_epi8_mask(k, and, zero) @@ -9338,8 +9786,8 @@ pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsadbw))] -pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { - transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) +pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9350,12 +9798,14 @@ pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x64(); - let b = b.as_u8x64(); - let r = vdbpsadbw(a, b, IMM8); - transmute(r) +pub fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(r) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9366,17 +9816,19 @@ pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i) -> __m5 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(4)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm512_mask_dbsad_epu8( +pub fn _mm512_mask_dbsad_epu8( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x64(); - let b = b.as_u8x64(); - let r = vdbpsadbw(a, b, IMM8); - transmute(simd_select_bitmask(k, r, src.as_u16x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x32())) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9387,16 +9839,14 @@ pub unsafe fn _mm512_mask_dbsad_epu8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm512_maskz_dbsad_epu8( - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x64(); - let b = b.as_u8x64(); - let r = vdbpsadbw(a, b, IMM8); - transmute(simd_select_bitmask(k, r, u16x32::ZERO)) +pub fn _mm512_maskz_dbsad_epu8(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x64(); + let b = b.as_u8x64(); + let r = vdbpsadbw(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x32::ZERO)) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9407,12 +9857,14 @@ pub unsafe fn _mm512_maskz_dbsad_epu8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x32(); - let b = b.as_u8x32(); - let r = vdbpsadbw256(a, b, IMM8); - transmute(r) +pub fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(r) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9423,17 +9875,19 @@ pub unsafe fn _mm256_dbsad_epu8(a: __m256i, b: __m256i) -> __m2 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(4)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm256_mask_dbsad_epu8( +pub fn _mm256_mask_dbsad_epu8( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x32(); - let b = b.as_u8x32(); - let r = vdbpsadbw256(a, b, IMM8); - transmute(simd_select_bitmask(k, r, src.as_u16x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x16())) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9444,16 +9898,14 @@ pub unsafe fn _mm256_mask_dbsad_epu8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm256_maskz_dbsad_epu8( - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x32(); - let b = b.as_u8x32(); - let r = vdbpsadbw256(a, b, IMM8); - transmute(simd_select_bitmask(k, r, u16x16::ZERO)) +pub fn _mm256_maskz_dbsad_epu8(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x32(); + let b = b.as_u8x32(); + let r = vdbpsadbw256(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x16::ZERO)) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9464,12 +9916,14 @@ pub unsafe fn _mm256_maskz_dbsad_epu8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x16(); - let b = b.as_u8x16(); - let r = vdbpsadbw128(a, b, IMM8); - transmute(r) +pub fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(r) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9480,17 +9934,19 @@ pub unsafe fn _mm_dbsad_epu8(a: __m128i, b: __m128i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(4)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm_mask_dbsad_epu8( +pub fn _mm_mask_dbsad_epu8( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x16(); - let b = b.as_u8x16(); - let r = vdbpsadbw128(a, b, IMM8); - transmute(simd_select_bitmask(k, r, src.as_u16x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(simd_select_bitmask(k, r, src.as_u16x8())) + } } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9501,16 +9957,14 @@ pub unsafe fn _mm_mask_dbsad_epu8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] -pub unsafe fn _mm_maskz_dbsad_epu8( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_u8x16(); - let b = b.as_u8x16(); - let r = vdbpsadbw128(a, b, IMM8); - transmute(simd_select_bitmask(k, r, u16x8::ZERO)) +pub fn _mm_maskz_dbsad_epu8(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_u8x16(); + let b = b.as_u8x16(); + let r = vdbpsadbw128(a, b, IMM8); + transmute(simd_select_bitmask(k, r, u16x8::ZERO)) + } } /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. @@ -9520,7 +9974,7 @@ pub unsafe fn _mm_maskz_dbsad_epu8( #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovw2m))] -pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { +pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { let filter = _mm512_set1_epi16(1 << 15); let a = _mm512_and_si512(a, filter); _mm512_cmpeq_epi16_mask(a, filter) @@ -9533,7 +9987,7 @@ pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovw2m))] -pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { +pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { let filter = _mm256_set1_epi16(1 << 15); let a = _mm256_and_si256(a, filter); _mm256_cmpeq_epi16_mask(a, filter) @@ -9546,7 +10000,7 @@ pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovw2m))] -pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { +pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { let filter = _mm_set1_epi16(1 << 15); let a = _mm_and_si128(a, filter); _mm_cmpeq_epi16_mask(a, filter) @@ -9559,7 +10013,7 @@ pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovb2m))] -pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { +pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { let filter = _mm512_set1_epi8(1 << 7); let a = _mm512_and_si512(a, filter); _mm512_cmpeq_epi8_mask(a, filter) @@ -9573,7 +10027,7 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than // using vpmovb2m plus converting the mask register to a standard register. -pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { +pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { let filter = _mm256_set1_epi8(1 << 7); let a = _mm256_and_si256(a, filter); _mm256_cmpeq_epi8_mask(a, filter) @@ -9587,7 +10041,7 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than // using vpmovb2m plus converting the mask register to a standard register. -pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { +pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { let filter = _mm_set1_epi8(1 << 7); let a = _mm_and_si128(a, filter); _mm_cmpeq_epi8_mask(a, filter) @@ -9600,27 +10054,29 @@ pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2w))] -pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i { - let one = _mm512_set1_epi16( - 1 << 15 - | 1 << 14 - | 1 << 13 - | 1 << 12 - | 1 << 11 - | 1 << 10 - | 1 << 9 - | 1 << 8 - | 1 << 7 - | 1 << 6 - | 1 << 5 - | 1 << 4 - | 1 << 3 - | 1 << 2 - | 1 << 1 - | 1 << 0, - ) - .as_i16x32(); - transmute(simd_select_bitmask(k, one, i16x32::ZERO)) +pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i { + unsafe { + let one = _mm512_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x32(); + transmute(simd_select_bitmask(k, one, i16x32::ZERO)) + } } /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9630,27 +10086,29 @@ pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2w))] -pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i { - let one = _mm256_set1_epi16( - 1 << 15 - | 1 << 14 - | 1 << 13 - | 1 << 12 - | 1 << 11 - | 1 << 10 - | 1 << 9 - | 1 << 8 - | 1 << 7 - | 1 << 6 - | 1 << 5 - | 1 << 4 - | 1 << 3 - | 1 << 2 - | 1 << 1 - | 1 << 0, - ) - .as_i16x16(); - transmute(simd_select_bitmask(k, one, i16x16::ZERO)) +pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i { + unsafe { + let one = _mm256_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x16(); + transmute(simd_select_bitmask(k, one, i16x16::ZERO)) + } } /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9660,27 +10118,29 @@ pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2w))] -pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i { - let one = _mm_set1_epi16( - 1 << 15 - | 1 << 14 - | 1 << 13 - | 1 << 12 - | 1 << 11 - | 1 << 10 - | 1 << 9 - | 1 << 8 - | 1 << 7 - | 1 << 6 - | 1 << 5 - | 1 << 4 - | 1 << 3 - | 1 << 2 - | 1 << 1 - | 1 << 0, - ) - .as_i16x8(); - transmute(simd_select_bitmask(k, one, i16x8::ZERO)) +pub fn _mm_movm_epi16(k: __mmask8) -> __m128i { + unsafe { + let one = _mm_set1_epi16( + 1 << 15 + | 1 << 14 + | 1 << 13 + | 1 << 12 + | 1 << 11 + | 1 << 10 + | 1 << 9 + | 1 << 8 + | 1 << 7 + | 1 << 6 + | 1 << 5 + | 1 << 4 + | 1 << 3 + | 1 << 2 + | 1 << 1 + | 1 << 0, + ) + .as_i16x8(); + transmute(simd_select_bitmask(k, one, i16x8::ZERO)) + } } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9690,11 +10150,13 @@ pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2b))] -pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i { - let one = - _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) - .as_i8x64(); - transmute(simd_select_bitmask(k, one, i8x64::ZERO)) +pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i { + unsafe { + let one = + _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x64(); + transmute(simd_select_bitmask(k, one, i8x64::ZERO)) + } } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9704,11 +10166,13 @@ pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2b))] -pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { - let one = - _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) - .as_i8x32(); - transmute(simd_select_bitmask(k, one, i8x32::ZERO)) +pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i { + unsafe { + let one = + _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x32(); + transmute(simd_select_bitmask(k, one, i8x32::ZERO)) + } } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9718,10 +10182,13 @@ pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovm2b))] -pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { - let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) - .as_i8x16(); - transmute(simd_select_bitmask(k, one, i8x16::ZERO)) +pub fn _mm_movm_epi8(k: __mmask16) -> __m128i { + unsafe { + let one = + _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) + .as_i8x16(); + transmute(simd_select_bitmask(k, one, i8x16::ZERO)) + } } /// Convert 32-bit mask a into an integer value, and store the result in dst. @@ -9730,7 +10197,7 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtmask32_u32(a: __mmask32) -> u32 { +pub fn _cvtmask32_u32(a: __mmask32) -> u32 { a } @@ -9740,7 +10207,7 @@ pub unsafe fn _cvtmask32_u32(a: __mmask32) -> u32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtu32_mask32(a: u32) -> __mmask32 { +pub fn _cvtu32_mask32(a: u32) -> __mmask32 { a } @@ -9750,7 +10217,7 @@ pub unsafe fn _cvtu32_mask32(a: u32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { a + b } @@ -9760,7 +10227,7 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { a + b } @@ -9770,7 +10237,7 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { a & b } @@ -9780,7 +10247,7 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { a & b } @@ -9790,7 +10257,7 @@ pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 { +pub fn _knot_mask32(a: __mmask32) -> __mmask32 { !a } @@ -9800,7 +10267,7 @@ pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 { +pub fn _knot_mask64(a: __mmask64) -> __mmask64 { !a } @@ -9810,7 +10277,7 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { _knot_mask32(a) & b } @@ -9820,7 +10287,7 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { _knot_mask64(a) & b } @@ -9830,7 +10297,7 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { a | b } @@ -9840,7 +10307,7 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { a | b } @@ -9850,7 +10317,7 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { a ^ b } @@ -9860,7 +10327,7 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { a ^ b } @@ -9870,7 +10337,7 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { _knot_mask32(a ^ b) } @@ -9880,7 +10347,7 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { _knot_mask64(a ^ b) } @@ -9917,7 +10384,7 @@ pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { +pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { (_kor_mask32(a, b) == 0xffffffff) as u8 } @@ -9928,7 +10395,7 @@ pub unsafe fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { +pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8 } @@ -9939,7 +10406,7 @@ pub unsafe fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { +pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { (_kor_mask32(a, b) == 0) as u8 } @@ -9950,7 +10417,7 @@ pub unsafe fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { +pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { (_kor_mask64(a, b) == 0) as u8 } @@ -9961,7 +10428,7 @@ pub unsafe fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { #[target_feature(enable = "avx512bw")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { +pub fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { a << COUNT } @@ -9972,7 +10439,7 @@ pub unsafe fn _kshiftli_mask32(a: __mmask32) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { +pub fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { a << COUNT } @@ -9983,7 +10450,7 @@ pub unsafe fn _kshiftli_mask64(a: __mmask64) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { +pub fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { a >> COUNT } @@ -9994,7 +10461,7 @@ pub unsafe fn _kshiftri_mask32(a: __mmask32) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftri_mask64(a: __mmask64) -> __mmask64 { +pub fn _kshiftri_mask64(a: __mmask64) -> __mmask64 { a >> COUNT } @@ -10031,7 +10498,7 @@ pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { +pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { (_kandn_mask32(a, b) == 0) as u8 } @@ -10042,7 +10509,7 @@ pub unsafe fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { +pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { (_kandn_mask64(a, b) == 0) as u8 } @@ -10053,7 +10520,7 @@ pub unsafe fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { +pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { (_kand_mask32(a, b) == 0) as u8 } @@ -10064,7 +10531,7 @@ pub unsafe fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { +pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { (_kand_mask64(a, b) == 0) as u8 } @@ -10075,7 +10542,7 @@ pub unsafe fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd -pub unsafe fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 { +pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 { ((a & 0xffff) << 16) | (b & 0xffff) } @@ -10086,7 +10553,7 @@ pub unsafe fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq -pub unsafe fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 { +pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 { ((a & 0xffffffff) << 32) | (b & 0xffffffff) } @@ -10097,9 +10564,11 @@ pub unsafe fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { - let a = a.as_i16x32(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i16x32(); + transmute::(simd_cast(a)) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10109,9 +10578,11 @@ pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, convert, src.as_i8x32())) +pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, convert, src.as_i8x32())) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10121,9 +10592,11 @@ pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); - transmute(simd_select_bitmask(k, convert, i8x32::ZERO)) +pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); + transmute(simd_select_bitmask(k, convert, i8x32::ZERO)) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -10133,9 +10606,11 @@ pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { - let a = a.as_i16x16(); - transmute::(simd_cast(a)) +pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i16x16(); + transmute::(simd_cast(a)) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10145,9 +10620,11 @@ pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, convert, src.as_i8x16())) +pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10157,9 +10634,11 @@ pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) +pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -10169,14 +10648,16 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let v256: i16x16 = simd_shuffle!( - a, - i16x8::ZERO, - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] - ); - transmute::(simd_cast(v256)) +pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let v256: i16x16 = simd_shuffle!( + a, + i16x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ); + transmute::(simd_cast(v256)) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10186,10 +10667,12 @@ pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi8(a).as_i8x16(); - let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; - transmute(simd_select_bitmask(k, convert, src.as_i8x16())) +pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi8(a).as_i8x16(); + let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10199,10 +10682,12 @@ pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovwb))] -pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi8(a).as_i8x16(); - let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; - transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) +pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi8(a).as_i8x16(); + let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10212,12 +10697,14 @@ pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { - transmute(vpmovswb( - a.as_i16x32(), - i8x32::ZERO, - 0b11111111_11111111_11111111_11111111, - )) +pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { + unsafe { + transmute(vpmovswb( + a.as_i16x32(), + i8x32::ZERO, + 0b11111111_11111111_11111111_11111111, + )) + } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10227,8 +10714,8 @@ pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) +pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10238,8 +10725,8 @@ pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) +pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10249,8 +10736,8 @@ pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { - transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) +pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10260,8 +10747,8 @@ pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) +pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10271,8 +10758,8 @@ pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) +pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10282,8 +10769,8 @@ pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { - transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) +pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10293,8 +10780,8 @@ pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) +pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10304,8 +10791,8 @@ pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] -pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) +pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10315,12 +10802,14 @@ pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { - transmute(vpmovuswb( - a.as_u16x32(), - u8x32::ZERO, - 0b11111111_11111111_11111111_11111111, - )) +pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { + unsafe { + transmute(vpmovuswb( + a.as_u16x32(), + u8x32::ZERO, + 0b11111111_11111111_11111111_11111111, + )) + } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10330,8 +10819,8 @@ pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) +pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10341,8 +10830,8 @@ pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) +pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { + unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10352,12 +10841,14 @@ pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { - transmute(vpmovuswb256( - a.as_u16x16(), - u8x16::ZERO, - 0b11111111_11111111, - )) +pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { + unsafe { + transmute(vpmovuswb256( + a.as_u16x16(), + u8x16::ZERO, + 0b11111111_11111111, + )) + } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10367,8 +10858,8 @@ pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) +pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10378,8 +10869,8 @@ pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) +pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { + unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10389,8 +10880,8 @@ pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { - transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) +pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10400,8 +10891,8 @@ pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) +pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) } } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10411,8 +10902,8 @@ pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] -pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) +pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst. @@ -10422,9 +10913,11 @@ pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { - let a = a.as_i8x32(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i8x32(); + transmute::(simd_cast(a)) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10434,9 +10927,11 @@ pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, convert, src.as_i16x32())) +pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, src.as_i16x32())) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10446,9 +10941,11 @@ pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) +pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10458,9 +10955,11 @@ pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, src.as_i16x16())) +pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10470,9 +10969,11 @@ pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) +pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10482,9 +10983,11 @@ pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, src.as_i16x8())) +pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10494,9 +10997,11 @@ pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbw))] -pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) +pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst. @@ -10506,9 +11011,11 @@ pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { - let a = a.as_u8x32(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u8x32(); + transmute::(simd_cast(a)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10518,9 +11025,11 @@ pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, convert, src.as_i16x32())) +pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, src.as_i16x32())) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10530,9 +11039,11 @@ pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) +pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10542,9 +11053,11 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, src.as_i16x16())) +pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10554,9 +11067,11 @@ pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) +pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10566,9 +11081,11 @@ pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, src.as_i16x8())) +pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10578,9 +11095,11 @@ pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbw))] -pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) +pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } } /// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst. @@ -10591,89 +11110,91 @@ pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - const fn mask(shift: i32, i: u32) -> u32 { - let shift = shift as u32 & 0xff; - if shift > 15 || i % 16 < shift { - 0 - } else { - 64 + (i - shift) +pub fn _mm512_bslli_epi128(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || i % 16 < shift { + 0 + } else { + 64 + (i - shift) + } } + let a = a.as_i8x64(); + let zero = i8x64::ZERO; + let r: i8x64 = simd_shuffle!( + zero, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + mask(IMM8, 32), + mask(IMM8, 33), + mask(IMM8, 34), + mask(IMM8, 35), + mask(IMM8, 36), + mask(IMM8, 37), + mask(IMM8, 38), + mask(IMM8, 39), + mask(IMM8, 40), + mask(IMM8, 41), + mask(IMM8, 42), + mask(IMM8, 43), + mask(IMM8, 44), + mask(IMM8, 45), + mask(IMM8, 46), + mask(IMM8, 47), + mask(IMM8, 48), + mask(IMM8, 49), + mask(IMM8, 50), + mask(IMM8, 51), + mask(IMM8, 52), + mask(IMM8, 53), + mask(IMM8, 54), + mask(IMM8, 55), + mask(IMM8, 56), + mask(IMM8, 57), + mask(IMM8, 58), + mask(IMM8, 59), + mask(IMM8, 60), + mask(IMM8, 61), + mask(IMM8, 62), + mask(IMM8, 63), + ], + ); + transmute(r) } - let a = a.as_i8x64(); - let zero = i8x64::ZERO; - let r: i8x64 = simd_shuffle!( - zero, - a, - [ - mask(IMM8, 0), - mask(IMM8, 1), - mask(IMM8, 2), - mask(IMM8, 3), - mask(IMM8, 4), - mask(IMM8, 5), - mask(IMM8, 6), - mask(IMM8, 7), - mask(IMM8, 8), - mask(IMM8, 9), - mask(IMM8, 10), - mask(IMM8, 11), - mask(IMM8, 12), - mask(IMM8, 13), - mask(IMM8, 14), - mask(IMM8, 15), - mask(IMM8, 16), - mask(IMM8, 17), - mask(IMM8, 18), - mask(IMM8, 19), - mask(IMM8, 20), - mask(IMM8, 21), - mask(IMM8, 22), - mask(IMM8, 23), - mask(IMM8, 24), - mask(IMM8, 25), - mask(IMM8, 26), - mask(IMM8, 27), - mask(IMM8, 28), - mask(IMM8, 29), - mask(IMM8, 30), - mask(IMM8, 31), - mask(IMM8, 32), - mask(IMM8, 33), - mask(IMM8, 34), - mask(IMM8, 35), - mask(IMM8, 36), - mask(IMM8, 37), - mask(IMM8, 38), - mask(IMM8, 39), - mask(IMM8, 40), - mask(IMM8, 41), - mask(IMM8, 42), - mask(IMM8, 43), - mask(IMM8, 44), - mask(IMM8, 45), - mask(IMM8, 46), - mask(IMM8, 47), - mask(IMM8, 48), - mask(IMM8, 49), - mask(IMM8, 50), - mask(IMM8, 51), - mask(IMM8, 52), - mask(IMM8, 53), - mask(IMM8, 54), - mask(IMM8, 55), - mask(IMM8, 56), - mask(IMM8, 57), - mask(IMM8, 58), - mask(IMM8, 59), - mask(IMM8, 60), - mask(IMM8, 61), - mask(IMM8, 62), - mask(IMM8, 63), - ], - ); - transmute(r) } /// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst. @@ -10684,171 +11205,208 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i8x64(); - let zero = i8x64::ZERO; - let r: i8x64 = match IMM8 % 16 { - 0 => simd_shuffle!( - a, - zero, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - ], - ), - 1 => simd_shuffle!( - a, - zero, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, - ], - ), - 2 => simd_shuffle!( - a, - zero, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, - ], - ), - 3 => simd_shuffle!( - a, - zero, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, - 114, - ], - ), - 4 => simd_shuffle!( - a, - zero, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, - 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, - 115, - ], - ), - 5 => simd_shuffle!( - a, - zero, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, - 115, 116, - ], - ), - 6 => simd_shuffle!( - a, - zero, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, - 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, - ], - ), - 7 => simd_shuffle!( - a, - zero, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, - 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, 118, - ], - ), - 8 => simd_shuffle!( - a, - zero, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, - 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, - 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, 118, 119, - ], - ), - 9 => simd_shuffle!( - a, - zero, - [ - 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, - 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, - 117, 118, 119, 120, - ], - ), - 10 => simd_shuffle!( - a, - zero, - [ - 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, - 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, - ], - ), - 11 => simd_shuffle!( - a, - zero, - [ - 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, - ], - ), - 12 => simd_shuffle!( - a, - zero, - [ - 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, - 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, - 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, 122, 123, - ], - ), - 13 => simd_shuffle!( - a, - zero, - [ - 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, - 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, - ], - ), - 14 => simd_shuffle!( - a, - zero, - [ - 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, - 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, - 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, - ], - ), - 15 => simd_shuffle!( - a, - zero, - [ - 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, - 121, 122, 123, 124, 125, 126, - ], - ), - _ => zero, - }; - transmute(r) +pub fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i8x64(); + let zero = i8x64::ZERO; + let r: i8x64 = match IMM8 % 16 { + 0 => { + simd_shuffle!( + a, + zero, + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, + ], + ) + } + 1 => { + simd_shuffle!( + a, + zero, + [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 112, + ], + ) + } + 2 => { + simd_shuffle!( + a, + zero, + [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 112, 113, + ], + ) + } + 3 => { + simd_shuffle!( + a, + zero, + [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 112, 113, 114, + ], + ) + } + 4 => { + simd_shuffle!( + a, + zero, + [ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 112, 113, 114, 115, + ], + ) + } + 5 => { + simd_shuffle!( + a, + zero, + [ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 112, 113, 114, 115, 116, + ], + ) + } + 6 => { + simd_shuffle!( + a, + zero, + [ + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 112, 113, 114, 115, 116, 117, + ], + ) + } + 7 => { + simd_shuffle!( + a, + zero, + [ + 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 112, 113, 114, 115, 116, 117, 118, + ], + ) + } + 8 => { + simd_shuffle!( + a, + zero, + [ + 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, + 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, + 46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, + 112, 113, 114, 115, 116, 117, 118, 119, + ], + ) + } + 9 => { + simd_shuffle!( + a, + zero, + [ + 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, + 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, + 47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, + 112, 113, 114, 115, 116, 117, 118, 119, 120, + ], + ) + } + 10 => { + simd_shuffle!( + a, + zero, + [ + 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, + 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, + ], + ) + } + 11 => { + simd_shuffle!( + a, + zero, + [ + 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, + 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + ], + ) + } + 12 => { + simd_shuffle!( + a, + zero, + [ + 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, + 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, + ], + ) + } + 13 => { + simd_shuffle!( + a, + zero, + [ + 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, + ], + ) + } + 14 => { + simd_shuffle!( + a, + zero, + [ + 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + ], + ) + } + 15 => { + simd_shuffle!( + a, + zero, + [ + 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + ], + ) + } + _ => zero, + }; + transmute(r) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst. @@ -10861,187 +11419,222 @@ pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m512i { - // If palignr is shifting the pair of vectors more than the size of two - // lanes, emit zero. - if IMM8 >= 32 { - return _mm512_setzero_si512(); - } - // If palignr is shifting the pair of input vectors more than one lane, - // but less than two lanes, convert to shifting in zeroes. - let (a, b) = if IMM8 > 16 { - (_mm512_setzero_si512(), a) - } else { - (a, b) - }; - let a = a.as_i8x64(); - let b = b.as_i8x64(); - - if IMM8 == 16 { - return transmute(a); - } - - let r: i8x64 = match IMM8 % 16 { - 0 => simd_shuffle!( - b, - a, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - ], - ), - 1 => simd_shuffle!( - b, - a, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, - ], - ), - 2 => simd_shuffle!( - b, - a, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, - ], - ), - 3 => simd_shuffle!( - b, - a, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, - 114, - ], - ), - 4 => simd_shuffle!( - b, - a, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, - 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, - 115, - ], - ), - 5 => simd_shuffle!( - b, - a, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, - 115, 116, - ], - ), - 6 => simd_shuffle!( - b, - a, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, - 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, - ], - ), - 7 => simd_shuffle!( - b, - a, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, - 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, 118, - ], - ), - 8 => simd_shuffle!( - b, - a, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, - 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, - 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, - 116, 117, 118, 119, - ], - ), - 9 => simd_shuffle!( - b, - a, - [ - 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, - 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, - 117, 118, 119, 120, - ], - ), - 10 => simd_shuffle!( - b, - a, - [ - 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, - 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, - ], - ), - 11 => simd_shuffle!( - b, - a, - [ - 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, - ], - ), - 12 => simd_shuffle!( - b, - a, - [ - 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, - 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, - 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, 122, 123, - ], - ), - 13 => simd_shuffle!( - b, - a, - [ - 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, - 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, - ], - ), - 14 => simd_shuffle!( - b, - a, - [ - 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, - 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, - 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, - ], - ), - 15 => simd_shuffle!( - b, - a, - [ - 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, - 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, - 121, 122, 123, 124, 125, 126, - ], - ), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { + // If palignr is shifting the pair of vectors more than the size of two + // lanes, emit zero. + if IMM8 >= 32 { + return _mm512_setzero_si512(); + } + // If palignr is shifting the pair of input vectors more than one lane, + // but less than two lanes, convert to shifting in zeroes. + let (a, b) = if IMM8 > 16 { + (_mm512_setzero_si512(), a) + } else { + (a, b) + }; + let a = a.as_i8x64(); + let b = b.as_i8x64(); + if IMM8 == 16 { + return transmute(a); + } + let r: i8x64 = match IMM8 % 16 { + 0 => { + simd_shuffle!( + b, + a, + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, + ], + ) + } + 1 => { + simd_shuffle!( + b, + a, + [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 112, + ], + ) + } + 2 => { + simd_shuffle!( + b, + a, + [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 112, 113, + ], + ) + } + 3 => { + simd_shuffle!( + b, + a, + [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 112, 113, 114, + ], + ) + } + 4 => { + simd_shuffle!( + b, + a, + [ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 112, 113, 114, 115, + ], + ) + } + 5 => { + simd_shuffle!( + b, + a, + [ + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 112, 113, 114, 115, 116, + ], + ) + } + 6 => { + simd_shuffle!( + b, + a, + [ + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 112, 113, 114, 115, 116, 117, + ], + ) + } + 7 => { + simd_shuffle!( + b, + a, + [ + 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 112, 113, 114, 115, 116, 117, 118, + ], + ) + } + 8 => { + simd_shuffle!( + b, + a, + [ + 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, + 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, + 46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, + 112, 113, 114, 115, 116, 117, 118, 119, + ], + ) + } + 9 => { + simd_shuffle!( + b, + a, + [ + 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, + 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, + 47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, + 112, 113, 114, 115, 116, 117, 118, 119, 120, + ], + ) + } + 10 => { + simd_shuffle!( + b, + a, + [ + 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, + 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, + ], + ) + } + 11 => { + simd_shuffle!( + b, + a, + [ + 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, + 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + ], + ) + } + 12 => { + simd_shuffle!( + b, + a, + [ + 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, + 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, + 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, + ], + ) + } + 13 => { + simd_shuffle!( + b, + a, + [ + 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, + ], + ) + } + 14 => { + simd_shuffle!( + b, + a, + [ + 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + ], + ) + } + 15 => { + simd_shuffle!( + b, + a, + [ + 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + ], + ) + } + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11052,15 +11645,17 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_alignr_epi8( +pub fn _mm512_mask_alignr_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64())) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11071,14 +11666,12 @@ pub unsafe fn _mm512_mask_alignr_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_alignr_epi8( - k: __mmask64, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO)) +pub fn _mm512_maskz_alignr_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO)) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11089,15 +11682,17 @@ pub unsafe fn _mm512_maskz_alignr_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(4)] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] -pub unsafe fn _mm256_mask_alignr_epi8( +pub fn _mm256_mask_alignr_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32())) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11108,14 +11703,12 @@ pub unsafe fn _mm256_mask_alignr_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] -pub unsafe fn _mm256_maskz_alignr_epi8( - k: __mmask32, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO)) +pub fn _mm256_maskz_alignr_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO)) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11126,15 +11719,17 @@ pub unsafe fn _mm256_maskz_alignr_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(4)] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] -pub unsafe fn _mm_mask_alignr_epi8( +pub fn _mm_mask_alignr_epi8( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16())) + } } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11145,14 +11740,12 @@ pub unsafe fn _mm_mask_alignr_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] -pub unsafe fn _mm_maskz_alignr_epi8( - k: __mmask16, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi8::(a, b); - transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO)) +pub fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi8::(a, b); + transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO)) + } } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. diff --git a/crates/core_arch/src/x86/avx512cd.rs b/crates/core_arch/src/x86/avx512cd.rs index 71eceab6bd..3982c55fa6 100644 --- a/crates/core_arch/src/x86/avx512cd.rs +++ b/crates/core_arch/src/x86/avx512cd.rs @@ -11,7 +11,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d -pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { +pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { _mm512_set1_epi32(k as i32) } @@ -22,7 +22,7 @@ pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d -pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { +pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { _mm256_set1_epi32(k as i32) } @@ -33,7 +33,7 @@ pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d -pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { +pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { _mm_set1_epi32(k as i32) } @@ -44,7 +44,7 @@ pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q -pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { +pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { _mm512_set1_epi64(k as i64) } @@ -55,7 +55,7 @@ pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q -pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { +pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { _mm256_set1_epi64x(k as i64) } @@ -66,7 +66,7 @@ pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q -pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { +pub fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { _mm_set1_epi64x(k as i64) } @@ -77,8 +77,8 @@ pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i { - transmute(vpconflictd(a.as_i32x16())) +pub fn _mm512_conflict_epi32(a: __m512i) -> __m512i { + unsafe { transmute(vpconflictd(a.as_i32x16())) } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -88,9 +88,11 @@ pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - let conflict = _mm512_conflict_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, conflict, src.as_i32x16())) +pub fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x16())) + } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -100,9 +102,11 @@ pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { - let conflict = _mm512_conflict_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, conflict, i32x16::ZERO)) +pub fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, conflict, i32x16::ZERO)) + } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -112,8 +116,8 @@ pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i { - transmute(vpconflictd256(a.as_i32x8())) +pub fn _mm256_conflict_epi32(a: __m256i) -> __m256i { + unsafe { transmute(vpconflictd256(a.as_i32x8())) } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -123,9 +127,11 @@ pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let conflict = _mm256_conflict_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, conflict, src.as_i32x8())) +pub fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x8())) + } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -135,9 +141,11 @@ pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { - let conflict = _mm256_conflict_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, conflict, i32x8::ZERO)) +pub fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, conflict, i32x8::ZERO)) + } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -147,8 +155,8 @@ pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i { - transmute(vpconflictd128(a.as_i32x4())) +pub fn _mm_conflict_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpconflictd128(a.as_i32x4())) } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -158,9 +166,11 @@ pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let conflict = _mm_conflict_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, conflict, src.as_i32x4())) +pub fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, conflict, src.as_i32x4())) + } } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -170,9 +180,11 @@ pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictd))] -pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { - let conflict = _mm_conflict_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, conflict, i32x4::ZERO)) +pub fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, conflict, i32x4::ZERO)) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -182,8 +194,8 @@ pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i { - transmute(vpconflictq(a.as_i64x8())) +pub fn _mm512_conflict_epi64(a: __m512i) -> __m512i { + unsafe { transmute(vpconflictq(a.as_i64x8())) } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -193,9 +205,11 @@ pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - let conflict = _mm512_conflict_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, conflict, src.as_i64x8())) +pub fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x8())) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -205,9 +219,11 @@ pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { - let conflict = _mm512_conflict_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, conflict, i64x8::ZERO)) +pub fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let conflict = _mm512_conflict_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, conflict, i64x8::ZERO)) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -217,8 +233,8 @@ pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i { - transmute(vpconflictq256(a.as_i64x4())) +pub fn _mm256_conflict_epi64(a: __m256i) -> __m256i { + unsafe { transmute(vpconflictq256(a.as_i64x4())) } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -228,9 +244,11 @@ pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let conflict = _mm256_conflict_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, conflict, src.as_i64x4())) +pub fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x4())) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -240,9 +258,11 @@ pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { - let conflict = _mm256_conflict_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, conflict, i64x4::ZERO)) +pub fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let conflict = _mm256_conflict_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, conflict, i64x4::ZERO)) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -252,8 +272,8 @@ pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i { - transmute(vpconflictq128(a.as_i64x2())) +pub fn _mm_conflict_epi64(a: __m128i) -> __m128i { + unsafe { transmute(vpconflictq128(a.as_i64x2())) } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -263,9 +283,11 @@ pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let conflict = _mm_conflict_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, conflict, src.as_i64x2())) +pub fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, conflict, src.as_i64x2())) + } } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. @@ -275,9 +297,11 @@ pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpconflictq))] -pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { - let conflict = _mm_conflict_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, conflict, i64x2::ZERO)) +pub fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let conflict = _mm_conflict_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, conflict, i64x2::ZERO)) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -287,8 +311,8 @@ pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { - transmute(simd_ctlz(a.as_i32x16())) +pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctlz(a.as_i32x16())) } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -298,9 +322,11 @@ pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, zerocount, src.as_i32x16())) +pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x16())) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -310,9 +336,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { - let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO)) +pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO)) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -322,8 +350,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { - transmute(simd_ctlz(a.as_i32x8())) +pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctlz(a.as_i32x8())) } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -333,9 +361,11 @@ pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, zerocount, src.as_i32x8())) +pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x8())) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -345,9 +375,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { - let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO)) +pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO)) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -357,8 +389,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { - transmute(simd_ctlz(a.as_i32x4())) +pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctlz(a.as_i32x4())) } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -368,9 +400,11 @@ pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, zerocount, src.as_i32x4())) +pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, zerocount, src.as_i32x4())) + } } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -380,9 +414,11 @@ pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntd))] -pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { - let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO)) +pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO)) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -392,8 +428,8 @@ pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { - transmute(simd_ctlz(a.as_i64x8())) +pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctlz(a.as_i64x8())) } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -403,9 +439,11 @@ pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, zerocount, src.as_i64x8())) +pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x8())) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -415,9 +453,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512cd")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { - let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO)) +pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO)) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -427,8 +467,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { - transmute(simd_ctlz(a.as_i64x4())) +pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctlz(a.as_i64x4())) } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -438,9 +478,11 @@ pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, zerocount, src.as_i64x4())) +pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x4())) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -450,9 +492,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { - let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO)) +pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO)) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -462,8 +506,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { - transmute(simd_ctlz(a.as_i64x2())) +pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctlz(a.as_i64x2())) } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -473,9 +517,11 @@ pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, zerocount, src.as_i64x2())) +pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, zerocount, src.as_i64x2())) + } } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -485,9 +531,11 @@ pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512cd,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vplzcntq))] -pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { - let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO)) +pub fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO)) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs index 66d0feebb6..20ae01b393 100644 --- a/crates/core_arch/src/x86/avx512dq.rs +++ b/crates/core_arch/src/x86/avx512dq.rs @@ -15,9 +15,11 @@ use crate::{ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let and = _mm_and_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, and, src.as_f64x2())) +pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let and = _mm_and_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, and, src.as_f64x2())) + } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and @@ -28,9 +30,11 @@ pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let and = _mm_and_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, and, f64x2::ZERO)) +pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let and = _mm_and_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, and, f64x2::ZERO)) + } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b @@ -42,9 +46,11 @@ pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let and = _mm256_and_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, and, src.as_f64x4())) +pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let and = _mm256_and_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, and, src.as_f64x4())) + } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and @@ -55,9 +61,11 @@ pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let and = _mm256_and_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, and, f64x4::ZERO)) +pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let and = _mm256_and_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, and, f64x4::ZERO)) + } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b @@ -68,8 +76,8 @@ pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) +pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b @@ -81,9 +89,11 @@ pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let and = _mm512_and_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, and, src.as_f64x8())) +pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let and = _mm512_and_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, and, src.as_f64x8())) + } } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and @@ -94,9 +104,11 @@ pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let and = _mm512_and_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, and, f64x8::ZERO)) +pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let and = _mm512_and_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, and, f64x8::ZERO)) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -108,9 +120,11 @@ pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let and = _mm_and_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, and, src.as_f32x4())) +pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let and = _mm_and_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, and, src.as_f32x4())) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and @@ -121,9 +135,11 @@ pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let and = _mm_and_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, and, f32x4::ZERO)) +pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let and = _mm_and_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, and, f32x4::ZERO)) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -135,9 +151,11 @@ pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let and = _mm256_and_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, and, src.as_f32x8())) +pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let and = _mm256_and_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, and, src.as_f32x8())) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and @@ -148,9 +166,11 @@ pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let and = _mm256_and_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, and, f32x8::ZERO)) +pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let and = _mm256_and_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, and, f32x8::ZERO)) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -161,11 +181,13 @@ pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_and( - transmute::<_, u32x16>(a), - transmute::<_, u32x16>(b), - )) +pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_and( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -177,9 +199,11 @@ pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let and = _mm512_and_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, and, src.as_f32x16())) +pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let and = _mm512_and_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, and, src.as_f32x16())) + } } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and @@ -190,9 +214,11 @@ pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let and = _mm512_and_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, and, f32x16::ZERO)) +pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let and = _mm512_and_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, and, f32x16::ZERO)) + } } // Andnot @@ -206,9 +232,11 @@ pub unsafe fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let andnot = _mm_andnot_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, andnot, src.as_f64x2())) +pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let andnot = _mm_andnot_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x2())) + } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -220,9 +248,11 @@ pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let andnot = _mm_andnot_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, andnot, f64x2::ZERO)) +pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let andnot = _mm_andnot_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, andnot, f64x2::ZERO)) + } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -234,9 +264,11 @@ pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let andnot = _mm256_andnot_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, andnot, src.as_f64x4())) +pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let andnot = _mm256_andnot_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x4())) + } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -248,9 +280,11 @@ pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let andnot = _mm256_andnot_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, andnot, f64x4::ZERO)) +pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let andnot = _mm256_andnot_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, andnot, f64x4::ZERO)) + } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -261,8 +295,8 @@ pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { - _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) +pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -274,9 +308,11 @@ pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let andnot = _mm512_andnot_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, andnot, src.as_f64x8())) +pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let andnot = _mm512_andnot_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, andnot, src.as_f64x8())) + } } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -288,9 +324,11 @@ pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let andnot = _mm512_andnot_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, andnot, f64x8::ZERO)) +pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let andnot = _mm512_andnot_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, andnot, f64x8::ZERO)) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -302,9 +340,11 @@ pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let andnot = _mm_andnot_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, andnot, src.as_f32x4())) +pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let andnot = _mm_andnot_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x4())) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -316,9 +356,11 @@ pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let andnot = _mm_andnot_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, andnot, f32x4::ZERO)) +pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let andnot = _mm_andnot_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, andnot, f32x4::ZERO)) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -330,9 +372,11 @@ pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let andnot = _mm256_andnot_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, andnot, src.as_f32x8())) +pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let andnot = _mm256_andnot_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x8())) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -344,9 +388,11 @@ pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m2 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let andnot = _mm256_andnot_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, andnot, f32x8::ZERO)) +pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let andnot = _mm256_andnot_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, andnot, f32x8::ZERO)) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -357,8 +403,8 @@ pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m25 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { - _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) +pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { + unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -370,9 +416,11 @@ pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let andnot = _mm512_andnot_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, andnot, src.as_f32x16())) +pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let andnot = _mm512_andnot_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, andnot, src.as_f32x16())) + } } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -384,9 +432,11 @@ pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let andnot = _mm512_andnot_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, andnot, f32x16::ZERO)) +pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let andnot = _mm512_andnot_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, andnot, f32x16::ZERO)) + } } // Or @@ -400,9 +450,11 @@ pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m5 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let or = _mm_or_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, or, src.as_f64x2())) +pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let or = _mm_or_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, or, src.as_f64x2())) + } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and @@ -413,9 +465,11 @@ pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let or = _mm_or_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, or, f64x2::ZERO)) +pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let or = _mm_or_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, or, f64x2::ZERO)) + } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b @@ -427,9 +481,11 @@ pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let or = _mm256_or_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, or, src.as_f64x4())) +pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let or = _mm256_or_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, or, src.as_f64x4())) + } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and @@ -440,9 +496,11 @@ pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let or = _mm256_or_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, or, f64x4::ZERO)) +pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let or = _mm256_or_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, or, f64x4::ZERO)) + } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b @@ -453,8 +511,8 @@ pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) +pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and @@ -466,9 +524,11 @@ pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let or = _mm512_or_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, or, src.as_f64x8())) +pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let or = _mm512_or_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, or, src.as_f64x8())) + } } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and @@ -479,9 +539,11 @@ pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let or = _mm512_or_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, or, f64x8::ZERO)) +pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let or = _mm512_or_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, or, f64x8::ZERO)) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -493,9 +555,11 @@ pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let or = _mm_or_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, or, src.as_f32x4())) +pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let or = _mm_or_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, or, src.as_f32x4())) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and @@ -506,9 +570,11 @@ pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let or = _mm_or_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, or, f32x4::ZERO)) +pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let or = _mm_or_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, or, f32x4::ZERO)) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -520,9 +586,11 @@ pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let or = _mm256_or_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, or, src.as_f32x8())) +pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let or = _mm256_or_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, or, src.as_f32x8())) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and @@ -533,9 +601,11 @@ pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let or = _mm256_or_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, or, f32x8::ZERO)) +pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let or = _mm256_or_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, or, f32x8::ZERO)) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -546,11 +616,13 @@ pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_or( - transmute::<_, u32x16>(a), - transmute::<_, u32x16>(b), - )) +pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_or( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and @@ -562,9 +634,11 @@ pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let or = _mm512_or_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, or, src.as_f32x16())) +pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let or = _mm512_or_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, or, src.as_f32x16())) + } } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and @@ -575,9 +649,11 @@ pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let or = _mm512_or_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, or, f32x16::ZERO)) +pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let or = _mm512_or_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, or, f32x16::ZERO)) + } } // Xor @@ -591,9 +667,11 @@ pub unsafe fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let xor = _mm_xor_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, xor, src.as_f64x2())) +pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let xor = _mm_xor_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, xor, src.as_f64x2())) + } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and @@ -604,9 +682,11 @@ pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let xor = _mm_xor_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, xor, f64x2::ZERO)) +pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let xor = _mm_xor_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, xor, f64x2::ZERO)) + } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b @@ -618,9 +698,11 @@ pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let xor = _mm256_xor_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, xor, src.as_f64x4())) +pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let xor = _mm256_xor_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, xor, src.as_f64x4())) + } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and @@ -631,9 +713,11 @@ pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let xor = _mm256_xor_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, xor, f64x4::ZERO)) +pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let xor = _mm256_xor_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, xor, f64x4::ZERO)) + } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b @@ -644,8 +728,8 @@ pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) +pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and @@ -657,9 +741,11 @@ pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let xor = _mm512_xor_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, xor, src.as_f64x8())) +pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let xor = _mm512_xor_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, xor, src.as_f64x8())) + } } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and @@ -670,9 +756,11 @@ pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorpd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let xor = _mm512_xor_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, xor, f64x8::ZERO)) +pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let xor = _mm512_xor_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, xor, f64x8::ZERO)) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -684,9 +772,11 @@ pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let xor = _mm_xor_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, xor, src.as_f32x4())) +pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let xor = _mm_xor_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, xor, src.as_f32x4())) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and @@ -697,9 +787,11 @@ pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let xor = _mm_xor_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, xor, f32x4::ZERO)) +pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let xor = _mm_xor_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, xor, f32x4::ZERO)) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -711,9 +803,11 @@ pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let xor = _mm256_xor_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, xor, src.as_f32x8())) +pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let xor = _mm256_xor_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, xor, src.as_f32x8())) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and @@ -724,9 +818,11 @@ pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let xor = _mm256_xor_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, xor, f32x8::ZERO)) +pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let xor = _mm256_xor_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, xor, f32x8::ZERO)) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -737,11 +833,13 @@ pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_xor( - transmute::<_, u32x16>(a), - transmute::<_, u32x16>(b), - )) +pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(simd_xor( + transmute::<_, u32x16>(a), + transmute::<_, u32x16>(b), + )) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and @@ -753,9 +851,11 @@ pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let xor = _mm512_xor_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, xor, src.as_f32x16())) +pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let xor = _mm512_xor_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, xor, src.as_f32x16())) + } } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and @@ -766,9 +866,11 @@ pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let xor = _mm512_xor_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, xor, f32x16::ZERO)) +pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let xor = _mm512_xor_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, xor, f32x16::ZERO)) + } } // Broadcast @@ -780,9 +882,11 @@ pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { - let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { + unsafe { + let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -793,9 +897,11 @@ pub unsafe fn _mm256_broadcast_f32x2(a: __m128) -> __m256 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcastf32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 { - let b = _mm256_broadcast_f32x2(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, src.as_f32x8())) +pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let b = _mm256_broadcast_f32x2(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -806,9 +912,11 @@ pub unsafe fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) - #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcastf32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { - let b = _mm256_broadcast_f32x2(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, f32x8::ZERO)) +pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let b = _mm256_broadcast_f32x2(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -818,9 +926,11 @@ pub unsafe fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { - let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { + unsafe { + let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -831,9 +941,11 @@ pub unsafe fn _mm512_broadcast_f32x2(a: __m128) -> __m512 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vbroadcastf32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 { - let b = _mm512_broadcast_f32x2(a).as_f32x16(); - transmute(simd_select_bitmask(k, b, src.as_f32x16())) +pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x2(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, src.as_f32x16())) + } } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -844,9 +956,11 @@ pub unsafe fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vbroadcastf32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { - let b = _mm512_broadcast_f32x2(a).as_f32x16(); - transmute(simd_select_bitmask(k, b, f32x16::ZERO)) +pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x2(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) + } } /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all @@ -856,9 +970,11 @@ pub unsafe fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { - let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); - transmute(b) +pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { + unsafe { + let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); + transmute(b) + } } /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all @@ -868,9 +984,11 @@ pub unsafe fn _mm512_broadcast_f32x8(a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 { - let b = _mm512_broadcast_f32x8(a).as_f32x16(); - transmute(simd_select_bitmask(k, b, src.as_f32x16())) +pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x8(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, src.as_f32x16())) + } } /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all @@ -880,9 +998,11 @@ pub unsafe fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { - let b = _mm512_broadcast_f32x8(a).as_f32x16(); - transmute(simd_select_bitmask(k, b, f32x16::ZERO)) +pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { + unsafe { + let b = _mm512_broadcast_f32x8(a).as_f32x16(); + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -892,9 +1012,11 @@ pub unsafe fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { - let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); - transmute(b) +pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { + unsafe { + let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -904,9 +1026,11 @@ pub unsafe fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { - let b = _mm256_broadcast_f64x2(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, src.as_f64x4())) +pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let b = _mm256_broadcast_f64x2(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -916,9 +1040,11 @@ pub unsafe fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { - let b = _mm256_broadcast_f64x2(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, f64x4::ZERO)) +pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let b = _mm256_broadcast_f64x2(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -928,9 +1054,11 @@ pub unsafe fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { - let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { + unsafe { + let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -940,9 +1068,11 @@ pub unsafe fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { - let b = _mm512_broadcast_f64x2(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, src.as_f64x8())) +pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let b = _mm512_broadcast_f64x2(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -952,9 +1082,11 @@ pub unsafe fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { - let b = _mm512_broadcast_f64x2(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, f64x8::ZERO)) +pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let b = _mm512_broadcast_f64x2(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -963,10 +1095,12 @@ pub unsafe fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { - let a = a.as_i32x4(); - let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); - transmute(b) +pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k @@ -977,9 +1111,11 @@ pub unsafe fn _mm_broadcast_i32x2(a: __m128i) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let b = _mm_broadcast_i32x2(a).as_i32x4(); - transmute(simd_select_bitmask(k, b, src.as_i32x4())) +pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let b = _mm_broadcast_i32x2(a).as_i32x4(); + transmute(simd_select_bitmask(k, b, src.as_i32x4())) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k @@ -990,9 +1126,11 @@ pub unsafe fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { - let b = _mm_broadcast_i32x2(a).as_i32x4(); - transmute(simd_select_bitmask(k, b, i32x4::ZERO)) +pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let b = _mm_broadcast_i32x2(a).as_i32x4(); + transmute(simd_select_bitmask(k, b, i32x4::ZERO)) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -1001,10 +1139,12 @@ pub unsafe fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { - let a = a.as_i32x4(); - let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i32x4(); + let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k @@ -1015,9 +1155,11 @@ pub unsafe fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let b = _mm256_broadcast_i32x2(a).as_i32x8(); - transmute(simd_select_bitmask(k, b, src.as_i32x8())) +pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i32x2(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, src.as_i32x8())) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k @@ -1028,9 +1170,11 @@ pub unsafe fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { - let b = _mm256_broadcast_i32x2(a).as_i32x8(); - transmute(simd_select_bitmask(k, b, i32x8::ZERO)) +pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i32x2(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -1039,10 +1183,12 @@ pub unsafe fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { - let a = a.as_i32x4(); - let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i32x4(); + let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k @@ -1053,9 +1199,11 @@ pub unsafe fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { - let b = _mm512_broadcast_i32x2(a).as_i32x16(); - transmute(simd_select_bitmask(k, b, src.as_i32x16())) +pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x2(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, src.as_i32x16())) + } } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k @@ -1066,9 +1214,11 @@ pub unsafe fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vbroadcasti32x2))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { - let b = _mm512_broadcast_i32x2(a).as_i32x16(); - transmute(simd_select_bitmask(k, b, i32x16::ZERO)) +pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x2(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) + } } /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst. @@ -1077,10 +1227,12 @@ pub unsafe fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { - let a = a.as_i32x8(); - let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); - transmute(b) +pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i32x8(); + let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]); + transmute(b) + } } /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k @@ -1090,9 +1242,11 @@ pub unsafe fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { - let b = _mm512_broadcast_i32x8(a).as_i32x16(); - transmute(simd_select_bitmask(k, b, src.as_i32x16())) +pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x8(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, src.as_i32x16())) + } } /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k @@ -1102,9 +1256,11 @@ pub unsafe fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { - let b = _mm512_broadcast_i32x8(a).as_i32x16(); - transmute(simd_select_bitmask(k, b, i32x16::ZERO)) +pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i32x8(a).as_i32x16(); + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. @@ -1113,10 +1269,12 @@ pub unsafe fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { - let a = a.as_i64x2(); - let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); - transmute(b) +pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i64x2(); + let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k @@ -1126,9 +1284,11 @@ pub unsafe fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let b = _mm256_broadcast_i64x2(a).as_i64x4(); - transmute(simd_select_bitmask(k, b, src.as_i64x4())) +pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i64x2(a).as_i64x4(); + transmute(simd_select_bitmask(k, b, src.as_i64x4())) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k @@ -1138,9 +1298,11 @@ pub unsafe fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { - let b = _mm256_broadcast_i64x2(a).as_i64x4(); - transmute(simd_select_bitmask(k, b, i64x4::ZERO)) +pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let b = _mm256_broadcast_i64x2(a).as_i64x4(); + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. @@ -1149,10 +1311,12 @@ pub unsafe fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { - let a = a.as_i64x2(); - let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); - transmute(b) +pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i64x2(); + let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]); + transmute(b) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k @@ -1162,9 +1326,11 @@ pub unsafe fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let b = _mm512_broadcast_i64x2(a).as_i64x8(); - transmute(simd_select_bitmask(k, b, src.as_i64x8())) +pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i64x2(a).as_i64x8(); + transmute(simd_select_bitmask(k, b, src.as_i64x8())) + } } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k @@ -1174,9 +1340,11 @@ pub unsafe fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { - let b = _mm512_broadcast_i64x2(a).as_i64x8(); - transmute(simd_select_bitmask(k, b, i64x8::ZERO)) +pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let b = _mm512_broadcast_i64x2(a).as_i64x8(); + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) + } } // Extract @@ -1189,11 +1357,13 @@ pub unsafe fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_extractf32x8_ps(a: __m512) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - match IMM8 & 1 { - 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), +pub fn _mm512_extractf32x8_ps(a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + } } } @@ -1207,14 +1377,12 @@ pub unsafe fn _mm512_extractf32x8_ps(a: __m512) -> __m256 { #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_extractf32x8_ps( - src: __m256, - k: __mmask8, - a: __m512, -) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_extractf32x8_ps::(a); - transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8())) +pub fn _mm512_mask_extractf32x8_ps(src: __m256, k: __mmask8, a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extractf32x8_ps::(a); + transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8())) + } } /// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a, @@ -1227,10 +1395,12 @@ pub unsafe fn _mm512_mask_extractf32x8_ps( #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m512) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_extractf32x8_ps::(a); - transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO)) +pub fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m512) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extractf32x8_ps::(a); + transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO)) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1241,11 +1411,13 @@ pub unsafe fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m5 #[target_feature(enable = "avx512dq,avx512vl")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_extractf64x2_pd(a: __m256d) -> __m128d { - static_assert_uimm_bits!(IMM8, 1); - match IMM8 & 1 { - 0 => simd_shuffle!(a, a, [0, 1]), - _ => simd_shuffle!(a, a, [2, 3]), +pub fn _mm256_extractf64x2_pd(a: __m256d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1]), + _ => simd_shuffle!(a, a, [2, 3]), + } } } @@ -1259,14 +1431,16 @@ pub unsafe fn _mm256_extractf64x2_pd(a: __m256d) -> __m128d { #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_extractf64x2_pd( +pub fn _mm256_mask_extractf64x2_pd( src: __m128d, k: __mmask8, a: __m256d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_extractf64x2_pd::(a); - transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extractf64x2_pd::(a); + transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2())) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1279,10 +1453,12 @@ pub unsafe fn _mm256_mask_extractf64x2_pd( #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m256d) -> __m128d { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_extractf64x2_pd::(a); - transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO)) +pub fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m256d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extractf64x2_pd::(a); + transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO)) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1293,13 +1469,15 @@ pub unsafe fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m2 #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_extractf64x2_pd(a: __m512d) -> __m128d { - static_assert_uimm_bits!(IMM8, 2); - match IMM8 & 3 { - 0 => simd_shuffle!(a, a, [0, 1]), - 1 => simd_shuffle!(a, a, [2, 3]), - 2 => simd_shuffle!(a, a, [4, 5]), - _ => simd_shuffle!(a, a, [6, 7]), +pub fn _mm512_extractf64x2_pd(a: __m512d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + match IMM8 & 3 { + 0 => simd_shuffle!(a, a, [0, 1]), + 1 => simd_shuffle!(a, a, [2, 3]), + 2 => simd_shuffle!(a, a, [4, 5]), + _ => simd_shuffle!(a, a, [6, 7]), + } } } @@ -1313,14 +1491,16 @@ pub unsafe fn _mm512_extractf64x2_pd(a: __m512d) -> __m128d { #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_extractf64x2_pd( +pub fn _mm512_mask_extractf64x2_pd( src: __m128d, k: __mmask8, a: __m512d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_extractf64x2_pd::(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, src.as_f64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extractf64x2_pd::(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1333,10 +1513,12 @@ pub unsafe fn _mm512_mask_extractf64x2_pd( #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m512d) -> __m128d { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_extractf64x2_pd::(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, f64x2::ZERO)) +pub fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m512d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extractf64x2_pd::(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } } /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores @@ -1347,14 +1529,16 @@ pub unsafe fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m5 #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_extracti32x8_epi32(a: __m512i) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let a = a.as_i32x16(); - let b: i32x8 = match IMM8 & 1 { - 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), - }; - transmute(b) +pub fn _mm512_extracti32x8_epi32(a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x16(); + let b: i32x8 = match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + }; + transmute(b) + } } /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores @@ -1366,14 +1550,16 @@ pub unsafe fn _mm512_extracti32x8_epi32(a: __m512i) -> __m256i #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_extracti32x8_epi32( +pub fn _mm512_mask_extracti32x8_epi32( src: __m256i, k: __mmask8, a: __m512i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); - transmute(simd_select_bitmask(k, b, src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, src.as_i32x8())) + } } /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores @@ -1385,10 +1571,12 @@ pub unsafe fn _mm512_mask_extracti32x8_epi32( #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: __m512i) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); - transmute(simd_select_bitmask(k, b, i32x8::ZERO)) +pub fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) + } } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1399,12 +1587,14 @@ pub unsafe fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: _ #[target_feature(enable = "avx512dq,avx512vl")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_extracti64x2_epi64(a: __m256i) -> __m128i { - static_assert_uimm_bits!(IMM8, 1); - let a = a.as_i64x4(); - match IMM8 & 1 { - 0 => simd_shuffle!(a, a, [0, 1]), - _ => simd_shuffle!(a, a, [2, 3]), +pub fn _mm256_extracti64x2_epi64(a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i64x4(); + match IMM8 & 1 { + 0 => simd_shuffle!(a, a, [0, 1]), + _ => simd_shuffle!(a, a, [2, 3]), + } } } @@ -1417,14 +1607,16 @@ pub unsafe fn _mm256_extracti64x2_epi64(a: __m256i) -> __m128i #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_extracti64x2_epi64( +pub fn _mm256_mask_extracti64x2_epi64( src: __m128i, k: __mmask8, a: __m256i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); - transmute(simd_select_bitmask(k, b, src.as_i64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1436,10 +1628,12 @@ pub unsafe fn _mm256_mask_extracti64x2_epi64( #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: __m256i) -> __m128i { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); - transmute(simd_select_bitmask(k, b, i64x2::ZERO)) +pub fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1450,14 +1644,16 @@ pub unsafe fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: _ #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_extracti64x2_epi64(a: __m512i) -> __m128i { - static_assert_uimm_bits!(IMM8, 2); - let a = a.as_i64x8(); - match IMM8 & 3 { - 0 => simd_shuffle!(a, a, [0, 1]), - 1 => simd_shuffle!(a, a, [2, 3]), - 2 => simd_shuffle!(a, a, [4, 5]), - _ => simd_shuffle!(a, a, [6, 7]), +pub fn _mm512_extracti64x2_epi64(a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i64x8(); + match IMM8 & 3 { + 0 => simd_shuffle!(a, a, [0, 1]), + 1 => simd_shuffle!(a, a, [2, 3]), + 2 => simd_shuffle!(a, a, [4, 5]), + _ => simd_shuffle!(a, a, [6, 7]), + } } } @@ -1470,14 +1666,16 @@ pub unsafe fn _mm512_extracti64x2_epi64(a: __m512i) -> __m128i #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_extracti64x2_epi64( +pub fn _mm512_mask_extracti64x2_epi64( src: __m128i, k: __mmask8, a: __m512i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); - transmute(simd_select_bitmask(k, b, src.as_i64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1489,10 +1687,12 @@ pub unsafe fn _mm512_mask_extracti64x2_epi64( #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: __m512i) -> __m128i { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); - transmute(simd_select_bitmask(k, b, i64x2::ZERO)) +pub fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } } // Insert @@ -1505,20 +1705,26 @@ pub unsafe fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: _ #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_insertf32x8(a: __m512, b: __m256) -> __m512 { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_castps256_ps512(b); - match IMM8 & 1 { - 0 => simd_shuffle!( - a, - b, - [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] - ), - _ => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] - ), +pub fn _mm512_insertf32x8(a: __m512, b: __m256) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castps256_ps512(b); + match IMM8 & 1 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] + ) + } + _ => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] + ) + } + } } } @@ -1532,15 +1738,17 @@ pub unsafe fn _mm512_insertf32x8(a: __m512, b: __m256) -> __m51 #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_insertf32x8( +pub fn _mm512_mask_insertf32x8( src: __m512, k: __mmask16, a: __m512, b: __m256, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm512_insertf32x8::(a, b); - transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_insertf32x8::(a, b); + transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16())) + } } /// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point @@ -1553,14 +1761,12 @@ pub unsafe fn _mm512_mask_insertf32x8( #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_insertf32x8( - k: __mmask16, - a: __m512, - b: __m256, -) -> __m512 { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm512_insertf32x8::(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, c, f32x16::ZERO)) +pub fn _mm512_maskz_insertf32x8(k: __mmask16, a: __m512, b: __m256) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_insertf32x8::(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, c, f32x16::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1571,12 +1777,14 @@ pub unsafe fn _mm512_maskz_insertf32x8( #[target_feature(enable = "avx512dq,avx512vl")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_insertf64x2(a: __m256d, b: __m128d) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_castpd128_pd256(b); - match IMM8 & 1 { - 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), - _ => simd_shuffle!(a, b, [0, 1, 4, 5]), +pub fn _mm256_insertf64x2(a: __m256d, b: __m128d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_castpd128_pd256(b); + match IMM8 & 1 { + 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), + _ => simd_shuffle!(a, b, [0, 1, 4, 5]), + } } } @@ -1590,15 +1798,17 @@ pub unsafe fn _mm256_insertf64x2(a: __m256d, b: __m128d) -> __m #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_insertf64x2( +pub fn _mm256_mask_insertf64x2( src: __m256d, k: __mmask8, a: __m256d, b: __m128d, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm256_insertf64x2::(a, b); - transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_insertf64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4())) + } } /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1611,14 +1821,12 @@ pub unsafe fn _mm256_mask_insertf64x2( #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_insertf64x2( - k: __mmask8, - a: __m256d, - b: __m128d, -) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm256_insertf64x2::(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, c, f64x4::ZERO)) +pub fn _mm256_maskz_insertf64x2(k: __mmask8, a: __m256d, b: __m128d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_insertf64x2::(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, c, f64x4::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1629,14 +1837,16 @@ pub unsafe fn _mm256_maskz_insertf64x2( #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_insertf64x2(a: __m512d, b: __m128d) -> __m512d { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_castpd128_pd512(b); - match IMM8 & 3 { - 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), +pub fn _mm512_insertf64x2(a: __m512d, b: __m128d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_castpd128_pd512(b); + match IMM8 & 3 { + 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), + 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), + } } } @@ -1650,15 +1860,17 @@ pub unsafe fn _mm512_insertf64x2(a: __m512d, b: __m128d) -> __m #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_insertf64x2( +pub fn _mm512_mask_insertf64x2( src: __m512d, k: __mmask8, a: __m512d, b: __m128d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 2); - let c = _mm512_insertf64x2::(a, b); - transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_insertf64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8())) + } } /// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1671,14 +1883,12 @@ pub unsafe fn _mm512_mask_insertf64x2( #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_insertf64x2( - k: __mmask8, - a: __m512d, - b: __m128d, -) -> __m512d { - static_assert_uimm_bits!(IMM8, 2); - let c = _mm512_insertf64x2::(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, c, f64x8::ZERO)) +pub fn _mm512_maskz_insertf64x2(k: __mmask8, a: __m512d, b: __m128d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_insertf64x2::(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, c, f64x8::ZERO)) + } } /// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the @@ -1689,23 +1899,29 @@ pub unsafe fn _mm512_maskz_insertf64x2( #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_inserti32x8(a: __m512i, b: __m256i) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let a = a.as_i32x16(); - let b = _mm512_castsi256_si512(b).as_i32x16(); - let r: i32x16 = match IMM8 & 1 { - 0 => simd_shuffle!( - a, - b, - [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] - ), - _ => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] - ), - }; - transmute(r) +pub fn _mm512_inserti32x8(a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x16(); + let b = _mm512_castsi256_si512(b).as_i32x16(); + let r: i32x16 = match IMM8 & 1 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15] + ) + } + _ => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23] + ) + } + }; + transmute(r) + } } /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the @@ -1718,15 +1934,17 @@ pub unsafe fn _mm512_inserti32x8(a: __m512i, b: __m256i) -> __m #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_inserti32x8( +pub fn _mm512_mask_inserti32x8( src: __m512i, k: __mmask16, a: __m512i, b: __m256i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm512_inserti32x8::(a, b); - transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_inserti32x8::(a, b); + transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16())) + } } /// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the @@ -1739,14 +1957,12 @@ pub unsafe fn _mm512_mask_inserti32x8( #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_inserti32x8( - k: __mmask16, - a: __m512i, - b: __m256i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm512_inserti32x8::(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, c, i32x16::ZERO)) +pub fn _mm512_maskz_inserti32x8(k: __mmask16, a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm512_inserti32x8::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, c, i32x16::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the @@ -1757,13 +1973,15 @@ pub unsafe fn _mm512_maskz_inserti32x8( #[target_feature(enable = "avx512dq,avx512vl")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_inserti64x2(a: __m256i, b: __m128i) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let a = a.as_i64x4(); - let b = _mm256_castsi128_si256(b).as_i64x4(); - match IMM8 & 1 { - 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), - _ => simd_shuffle!(a, b, [0, 1, 4, 5]), +pub fn _mm256_inserti64x2(a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i64x4(); + let b = _mm256_castsi128_si256(b).as_i64x4(); + match IMM8 & 1 { + 0 => simd_shuffle!(a, b, [4, 5, 2, 3]), + _ => simd_shuffle!(a, b, [0, 1, 4, 5]), + } } } @@ -1777,15 +1995,17 @@ pub unsafe fn _mm256_inserti64x2(a: __m256i, b: __m128i) -> __m #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_inserti64x2( +pub fn _mm256_mask_inserti64x2( src: __m256i, k: __mmask8, a: __m256i, b: __m128i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm256_inserti64x2::(a, b); - transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_inserti64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4())) + } } /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the @@ -1798,14 +2018,12 @@ pub unsafe fn _mm256_mask_inserti64x2( #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_inserti64x2( - k: __mmask8, - a: __m256i, - b: __m128i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let c = _mm256_inserti64x2::(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, c, i64x4::ZERO)) +pub fn _mm256_maskz_inserti64x2(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let c = _mm256_inserti64x2::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, c, i64x4::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the @@ -1816,15 +2034,17 @@ pub unsafe fn _mm256_maskz_inserti64x2( #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_inserti64x2(a: __m512i, b: __m128i) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let a = a.as_i64x8(); - let b = _mm512_castsi128_si512(b).as_i64x8(); - match IMM8 & 3 { - 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), - 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), +pub fn _mm512_inserti64x2(a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i64x8(); + let b = _mm512_castsi128_si512(b).as_i64x8(); + match IMM8 & 3 { + 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]), + 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]), + } } } @@ -1838,15 +2058,17 @@ pub unsafe fn _mm512_inserti64x2(a: __m512i, b: __m128i) -> __m #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_inserti64x2( +pub fn _mm512_mask_inserti64x2( src: __m512i, k: __mmask8, a: __m512i, b: __m128i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let c = _mm512_inserti64x2::(a, b); - transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_inserti64x2::(a, b); + transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8())) + } } /// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the @@ -1859,14 +2081,12 @@ pub unsafe fn _mm512_mask_inserti64x2( #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_inserti64x2( - k: __mmask8, - a: __m512i, - b: __m128i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let c = _mm512_inserti64x2::(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, c, i64x8::ZERO)) +pub fn _mm512_maskz_inserti64x2(k: __mmask8, a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let c = _mm512_inserti64x2::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, c, i64x8::ZERO)) + } } // Convert @@ -1886,9 +2106,11 @@ pub unsafe fn _mm512_maskz_inserti64x2( #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundepi64_pd(a: __m512i) -> __m512d { - static_assert_rounding!(ROUNDING); - transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING)) +pub fn _mm512_cvt_roundepi64_pd(a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING)) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1907,14 +2129,16 @@ pub unsafe fn _mm512_cvt_roundepi64_pd(a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundepi64_pd( +pub fn _mm512_mask_cvt_roundepi64_pd( src: __m512d, k: __mmask8, a: __m512i, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1933,13 +2157,12 @@ pub unsafe fn _mm512_mask_cvt_roundepi64_pd( #[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundepi64_pd( - k: __mmask8, - a: __m512i, -) -> __m512d { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, f64x8::ZERO)) +pub fn _mm512_maskz_cvt_roundepi64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1950,8 +2173,8 @@ pub unsafe fn _mm512_maskz_cvt_roundepi64_pd( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { - transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { + unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1963,9 +2186,11 @@ pub unsafe fn _mm_cvtepi64_pd(a: __m128i) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { - let b = _mm_cvtepi64_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, src.as_f64x2())) +pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepi64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1976,9 +2201,11 @@ pub unsafe fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { - let b = _mm_cvtepi64_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, f64x2::ZERO)) +pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepi64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -1989,8 +2216,8 @@ pub unsafe fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { - transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { + unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2002,9 +2229,11 @@ pub unsafe fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { - let b = _mm256_cvtepi64_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, src.as_f64x4())) +pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepi64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2015,9 +2244,11 @@ pub unsafe fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { - let b = _mm256_cvtepi64_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, f64x4::ZERO)) +pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepi64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2028,8 +2259,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { - transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { + unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2041,9 +2272,11 @@ pub unsafe fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { - let b = _mm512_cvtepi64_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, src.as_f64x8())) +pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepi64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2054,9 +2287,11 @@ pub unsafe fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { - let b = _mm512_cvtepi64_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, f64x8::ZERO)) +pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepi64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2074,9 +2309,11 @@ pub unsafe fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundepi64_ps(a: __m512i) -> __m256 { - static_assert_rounding!(ROUNDING); - transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING)) +pub fn _mm512_cvt_roundepi64_ps(a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING)) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2095,14 +2332,16 @@ pub unsafe fn _mm512_cvt_roundepi64_ps(a: __m512i) -> __m25 #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundepi64_ps( +pub fn _mm512_mask_cvt_roundepi64_ps( src: __m256, k: __mmask8, a: __m512i, ) -> __m256 { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, src.as_f32x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2121,13 +2360,12 @@ pub unsafe fn _mm512_mask_cvt_roundepi64_ps( #[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundepi64_ps( - k: __mmask8, - a: __m512i, -) -> __m256 { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, f32x8::ZERO)) +pub fn _mm512_maskz_cvt_roundepi64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2138,7 +2376,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepi64_ps( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { +pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a) } @@ -2151,8 +2389,8 @@ pub unsafe fn _mm_cvtepi64_ps(a: __m128i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { - transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) +pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2163,7 +2401,7 @@ pub unsafe fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m1 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { +pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a) } @@ -2175,8 +2413,8 @@ pub unsafe fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { - transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { + unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2188,9 +2426,11 @@ pub unsafe fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { - let b = _mm256_cvtepi64_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, b, src.as_f32x4())) +pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepi64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, src.as_f32x4())) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2201,9 +2441,11 @@ pub unsafe fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> _ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { - let b = _mm256_cvtepi64_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, b, f32x4::ZERO)) +pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepi64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2214,8 +2456,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { - transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { + unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2227,9 +2469,11 @@ pub unsafe fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { - let b = _mm512_cvtepi64_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, src.as_f32x8())) +pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepi64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2240,9 +2484,11 @@ pub unsafe fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> _ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { - let b = _mm512_cvtepi64_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, f32x8::ZERO)) +pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepi64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2260,9 +2506,11 @@ pub unsafe fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundepu64_pd(a: __m512i) -> __m512d { - static_assert_rounding!(ROUNDING); - transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING)) +pub fn _mm512_cvt_roundepu64_pd(a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING)) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2281,14 +2529,16 @@ pub unsafe fn _mm512_cvt_roundepu64_pd(a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundepu64_pd( +pub fn _mm512_mask_cvt_roundepu64_pd( src: __m512d, k: __mmask8, a: __m512i, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2307,13 +2557,12 @@ pub unsafe fn _mm512_mask_cvt_roundepu64_pd( #[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundepu64_pd( - k: __mmask8, - a: __m512i, -) -> __m512d { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, f64x8::ZERO)) +pub fn _mm512_maskz_cvt_roundepu64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2324,8 +2573,8 @@ pub unsafe fn _mm512_maskz_cvt_roundepu64_pd( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { - transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { + unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2337,9 +2586,11 @@ pub unsafe fn _mm_cvtepu64_pd(a: __m128i) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { - let b = _mm_cvtepu64_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, src.as_f64x2())) +pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepu64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, src.as_f64x2())) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2350,9 +2601,11 @@ pub unsafe fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { - let b = _mm_cvtepu64_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, b, f64x2::ZERO)) +pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let b = _mm_cvtepu64_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2363,8 +2616,8 @@ pub unsafe fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { - transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { + unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2376,9 +2629,11 @@ pub unsafe fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { - let b = _mm256_cvtepu64_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, src.as_f64x4())) +pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepu64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, src.as_f64x4())) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2389,9 +2644,11 @@ pub unsafe fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { - let b = _mm256_cvtepu64_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, b, f64x4::ZERO)) +pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { + unsafe { + let b = _mm256_cvtepu64_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2402,8 +2659,8 @@ pub unsafe fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { - transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { + unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2415,9 +2672,11 @@ pub unsafe fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { - let b = _mm512_cvtepu64_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, src.as_f64x8())) +pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepu64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, src.as_f64x8())) + } } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2428,9 +2687,11 @@ pub unsafe fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2pd))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { - let b = _mm512_cvtepu64_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, b, f64x8::ZERO)) +pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { + unsafe { + let b = _mm512_cvtepu64_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2448,9 +2709,11 @@ pub unsafe fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundepu64_ps(a: __m512i) -> __m256 { - static_assert_rounding!(ROUNDING); - transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING)) +pub fn _mm512_cvt_roundepu64_ps(a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING)) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2469,14 +2732,16 @@ pub unsafe fn _mm512_cvt_roundepu64_ps(a: __m512i) -> __m25 #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundepu64_ps( +pub fn _mm512_mask_cvt_roundepu64_ps( src: __m256, k: __mmask8, a: __m512i, ) -> __m256 { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, src.as_f32x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2495,13 +2760,12 @@ pub unsafe fn _mm512_mask_cvt_roundepu64_ps( #[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundepu64_ps( - k: __mmask8, - a: __m512i, -) -> __m256 { - static_assert_rounding!(ROUNDING); - let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, f32x8::ZERO)) +pub fn _mm512_maskz_cvt_roundepu64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2512,7 +2776,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu64_ps( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { +pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a) } @@ -2525,8 +2789,8 @@ pub unsafe fn _mm_cvtepu64_ps(a: __m128i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { - transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) +pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2537,7 +2801,7 @@ pub unsafe fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m1 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { +pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a) } @@ -2549,8 +2813,8 @@ pub unsafe fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { - transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { + unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2562,9 +2826,11 @@ pub unsafe fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { - let b = _mm256_cvtepu64_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, b, src.as_f32x4())) +pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepu64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, src.as_f32x4())) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2575,9 +2841,11 @@ pub unsafe fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> _ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { - let b = _mm256_cvtepu64_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, b, f32x4::ZERO)) +pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { + unsafe { + let b = _mm256_cvtepu64_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2588,8 +2856,8 @@ pub unsafe fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { - transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { + unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2601,9 +2869,11 @@ pub unsafe fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { - let b = _mm512_cvtepu64_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, src.as_f32x8())) +pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepu64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, src.as_f32x8())) + } } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2614,9 +2884,11 @@ pub unsafe fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> _ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtuqq2ps))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { - let b = _mm512_cvtepu64_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, b, f32x8::ZERO)) +pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { + unsafe { + let b = _mm512_cvtepu64_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2634,7 +2906,7 @@ pub unsafe fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundpd_epi64(a: __m512d) -> __m512i { +pub fn _mm512_cvt_roundpd_epi64(a: __m512d) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -2655,13 +2927,15 @@ pub unsafe fn _mm512_cvt_roundpd_epi64(a: __m512d) -> __m51 #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundpd_epi64( +pub fn _mm512_mask_cvt_roundpd_epi64( src: __m512i, k: __mmask8, a: __m512d, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2680,10 +2954,7 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epi64( #[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundpd_epi64( - k: __mmask8, - a: __m512d, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundpd_epi64(k: __mmask8, a: __m512d) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_epi64::(_mm512_setzero_si512(), k, a) } @@ -2696,7 +2967,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi64( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { +pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a) } @@ -2709,8 +2980,8 @@ pub unsafe fn _mm_cvtpd_epi64(a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) +pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2721,7 +2992,7 @@ pub unsafe fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { +pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a) } @@ -2733,7 +3004,7 @@ pub unsafe fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { +pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -2746,8 +3017,8 @@ pub unsafe fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { - transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2758,7 +3029,7 @@ pub unsafe fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { +pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a) } @@ -2770,7 +3041,7 @@ pub unsafe fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { +pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -2783,13 +3054,15 @@ pub unsafe fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { - transmute(vcvtpd2qq_512( - a.as_f64x8(), - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvtpd2qq_512( + a.as_f64x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2800,7 +3073,7 @@ pub unsafe fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a) } @@ -2819,7 +3092,7 @@ pub unsafe fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundps_epi64(a: __m256) -> __m512i { +pub fn _mm512_cvt_roundps_epi64(a: __m256) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundps_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -2840,13 +3113,15 @@ pub unsafe fn _mm512_cvt_roundps_epi64(a: __m256) -> __m512 #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundps_epi64( +pub fn _mm512_mask_cvt_roundps_epi64( src: __m512i, k: __mmask8, a: __m256, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2865,10 +3140,7 @@ pub unsafe fn _mm512_mask_cvt_roundps_epi64( #[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundps_epi64( - k: __mmask8, - a: __m256, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundps_epi64(k: __mmask8, a: __m256) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundps_epi64::(_mm512_setzero_si512(), k, a) } @@ -2881,7 +3153,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi64( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtps_epi64(a: __m128) -> __m128i { +pub fn _mm_cvtps_epi64(a: __m128) -> __m128i { _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a) } @@ -2894,8 +3166,8 @@ pub unsafe fn _mm_cvtps_epi64(a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) +pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2906,7 +3178,7 @@ pub unsafe fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m1 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { +pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a) } @@ -2918,7 +3190,7 @@ pub unsafe fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtps_epi64(a: __m128) -> __m256i { +pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i { _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -2931,8 +3203,8 @@ pub unsafe fn _mm256_cvtps_epi64(a: __m128) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { - transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2943,7 +3215,7 @@ pub unsafe fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> _ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { +pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a) } @@ -2955,7 +3227,7 @@ pub unsafe fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtps_epi64(a: __m256) -> __m512i { +pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i { _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -2968,13 +3240,15 @@ pub unsafe fn _mm512_cvtps_epi64(a: __m256) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { - transmute(vcvtps2qq_512( - a.as_f32x8(), - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvtps2qq_512( + a.as_f32x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers, @@ -2985,7 +3259,7 @@ pub unsafe fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> _ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a) } @@ -3004,7 +3278,7 @@ pub unsafe fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i { #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundpd_epu64(a: __m512d) -> __m512i { +pub fn _mm512_cvt_roundpd_epu64(a: __m512d) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3025,13 +3299,15 @@ pub unsafe fn _mm512_cvt_roundpd_epu64(a: __m512d) -> __m51 #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundpd_epu64( +pub fn _mm512_mask_cvt_roundpd_epu64( src: __m512i, k: __mmask8, a: __m512d, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3050,10 +3326,7 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epu64( #[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundpd_epu64( - k: __mmask8, - a: __m512d, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundpd_epu64(k: __mmask8, a: __m512d) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_epu64::(_mm512_setzero_si512(), k, a) } @@ -3066,7 +3339,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu64( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { +pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a) } @@ -3079,8 +3352,8 @@ pub unsafe fn _mm_cvtpd_epu64(a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) +pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3091,7 +3364,7 @@ pub unsafe fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { +pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a) } @@ -3103,7 +3376,7 @@ pub unsafe fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { +pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -3116,8 +3389,8 @@ pub unsafe fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { - transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) +pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3128,7 +3401,7 @@ pub unsafe fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { +pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a) } @@ -3140,7 +3413,7 @@ pub unsafe fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { +pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -3153,13 +3426,15 @@ pub unsafe fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { - transmute(vcvtpd2uqq_512( - a.as_f64x8(), - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvtpd2uqq_512( + a.as_f64x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3170,7 +3445,7 @@ pub unsafe fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a) } @@ -3189,7 +3464,7 @@ pub unsafe fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvt_roundps_epu64(a: __m256) -> __m512i { +pub fn _mm512_cvt_roundps_epu64(a: __m256) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundps_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3210,13 +3485,15 @@ pub unsafe fn _mm512_cvt_roundps_epu64(a: __m256) -> __m512 #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvt_roundps_epu64( +pub fn _mm512_mask_cvt_roundps_epu64( src: __m512i, k: __mmask8, a: __m256, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3235,10 +3512,7 @@ pub unsafe fn _mm512_mask_cvt_roundps_epu64( #[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvt_roundps_epu64( - k: __mmask8, - a: __m256, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundps_epu64(k: __mmask8, a: __m256) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundps_epu64::(_mm512_setzero_si512(), k, a) } @@ -3251,7 +3525,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epu64( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtps_epu64(a: __m128) -> __m128i { +pub fn _mm_cvtps_epu64(a: __m128) -> __m128i { _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a) } @@ -3264,8 +3538,8 @@ pub unsafe fn _mm_cvtps_epu64(a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) +pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3276,7 +3550,7 @@ pub unsafe fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m1 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { +pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a) } @@ -3288,7 +3562,7 @@ pub unsafe fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtps_epu64(a: __m128) -> __m256i { +pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i { _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -3301,8 +3575,8 @@ pub unsafe fn _mm256_cvtps_epu64(a: __m128) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { - transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) +pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3313,7 +3587,7 @@ pub unsafe fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> _ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { +pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a) } @@ -3325,7 +3599,7 @@ pub unsafe fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtps_epu64(a: __m256) -> __m512i { +pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i { _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -3338,13 +3612,15 @@ pub unsafe fn _mm512_cvtps_epu64(a: __m256) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { - transmute(vcvtps2uqq_512( - a.as_f32x8(), - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvtps2uqq_512( + a.as_f32x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers, @@ -3355,7 +3631,7 @@ pub unsafe fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> _ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvtps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a) } @@ -3369,7 +3645,7 @@ pub unsafe fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i { #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtt_roundpd_epi64(a: __m512d) -> __m512i { +pub fn _mm512_cvtt_roundpd_epi64(a: __m512d) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundpd_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3384,13 +3660,15 @@ pub unsafe fn _mm512_cvtt_roundpd_epi64(a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtt_roundpd_epi64( +pub fn _mm512_mask_cvtt_roundpd_epi64( src: __m512i, k: __mmask8, a: __m512d, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers @@ -3403,7 +3681,7 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epi64( #[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epi64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvtt_roundpd_epi64(k: __mmask8, a: __m512d) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundpd_epi64::(_mm512_setzero_si512(), k, a) } @@ -3416,7 +3694,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epi64(k: __mmask8, a: __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { +pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a) } @@ -3429,8 +3707,8 @@ pub unsafe fn _mm_cvttpd_epi64(a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) +pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers @@ -3442,7 +3720,7 @@ pub unsafe fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { +pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a) } @@ -3454,7 +3732,7 @@ pub unsafe fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { +pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -3467,8 +3745,8 @@ pub unsafe fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { - transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers @@ -3480,7 +3758,7 @@ pub unsafe fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { +pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a) } @@ -3492,7 +3770,7 @@ pub unsafe fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { +pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -3505,13 +3783,15 @@ pub unsafe fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { - transmute(vcvttpd2qq_512( - a.as_f64x8(), - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvttpd2qq_512( + a.as_f64x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers @@ -3523,7 +3803,7 @@ pub unsafe fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a) } @@ -3537,7 +3817,7 @@ pub unsafe fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtt_roundps_epi64(a: __m256) -> __m512i { +pub fn _mm512_cvtt_roundps_epi64(a: __m256) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundps_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3552,13 +3832,15 @@ pub unsafe fn _mm512_cvtt_roundps_epi64(a: __m256) -> __m512i { #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtt_roundps_epi64( +pub fn _mm512_mask_cvtt_roundps_epi64( src: __m512i, k: __mmask8, a: __m256, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers @@ -3571,7 +3853,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epi64( #[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtt_roundps_epi64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvtt_roundps_epi64(k: __mmask8, a: __m256) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundps_epi64::(_mm512_setzero_si512(), k, a) } @@ -3584,7 +3866,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi64(k: __mmask8, a: __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvttps_epi64(a: __m128) -> __m128i { +pub fn _mm_cvttps_epi64(a: __m128) -> __m128i { _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a) } @@ -3597,8 +3879,8 @@ pub unsafe fn _mm_cvttps_epi64(a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) +pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers @@ -3610,7 +3892,7 @@ pub unsafe fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { +pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a) } @@ -3622,7 +3904,7 @@ pub unsafe fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvttps_epi64(a: __m128) -> __m256i { +pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i { _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -3635,8 +3917,8 @@ pub unsafe fn _mm256_cvttps_epi64(a: __m128) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { - transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers @@ -3648,7 +3930,7 @@ pub unsafe fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { +pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a) } @@ -3660,7 +3942,7 @@ pub unsafe fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvttps_epi64(a: __m256) -> __m512i { +pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i { _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -3673,13 +3955,15 @@ pub unsafe fn _mm512_cvttps_epi64(a: __m256) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { - transmute(vcvttps2qq_512( - a.as_f32x8(), - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvttps2qq_512( + a.as_f32x8(), + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers @@ -3691,7 +3975,7 @@ pub unsafe fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2qq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a) } @@ -3705,7 +3989,7 @@ pub unsafe fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i { #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtt_roundpd_epu64(a: __m512d) -> __m512i { +pub fn _mm512_cvtt_roundpd_epu64(a: __m512d) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundpd_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3720,13 +4004,15 @@ pub unsafe fn _mm512_cvtt_roundpd_epu64(a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtt_roundpd_epu64( +pub fn _mm512_mask_cvtt_roundpd_epu64( src: __m512i, k: __mmask8, a: __m512d, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3739,7 +4025,7 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epu64( #[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epu64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvtt_roundpd_epu64(k: __mmask8, a: __m512d) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundpd_epu64::(_mm512_setzero_si512(), k, a) } @@ -3752,7 +4038,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epu64(k: __mmask8, a: __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { +pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a) } @@ -3765,8 +4051,8 @@ pub unsafe fn _mm_cvttpd_epu64(a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) +pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3778,7 +4064,7 @@ pub unsafe fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { +pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a) } @@ -3790,7 +4076,7 @@ pub unsafe fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { +pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -3803,8 +4089,8 @@ pub unsafe fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { - transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) +pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i { + unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3816,7 +4102,7 @@ pub unsafe fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { +pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a) } @@ -3828,7 +4114,7 @@ pub unsafe fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { +pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -3841,13 +4127,15 @@ pub unsafe fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { - transmute(vcvttpd2uqq_512( - a.as_f64x8(), - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i { + unsafe { + transmute(vcvttpd2uqq_512( + a.as_f64x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3859,7 +4147,7 @@ pub unsafe fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttpd2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { +pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a) } @@ -3873,7 +4161,7 @@ pub unsafe fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i { #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtt_roundps_epu64(a: __m256) -> __m512i { +pub fn _mm512_cvtt_roundps_epu64(a: __m256) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundps_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -3888,13 +4176,15 @@ pub unsafe fn _mm512_cvtt_roundps_epu64(a: __m256) -> __m512i { #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvtt_roundps_epu64( +pub fn _mm512_mask_cvtt_roundps_epu64( src: __m512i, k: __mmask8, a: __m256, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3907,7 +4197,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu64( #[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvtt_roundps_epu64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvtt_roundps_epu64(k: __mmask8, a: __m256) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundps_epu64::(_mm512_setzero_si512(), k, a) } @@ -3920,7 +4210,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epu64(k: __mmask8, a: __ #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvttps_epu64(a: __m128) -> __m128i { +pub fn _mm_cvttps_epu64(a: __m128) -> __m128i { _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a) } @@ -3933,8 +4223,8 @@ pub unsafe fn _mm_cvttps_epu64(a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) +pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3946,7 +4236,7 @@ pub unsafe fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { +pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a) } @@ -3958,7 +4248,7 @@ pub unsafe fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvttps_epu64(a: __m128) -> __m256i { +pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i { _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -3971,8 +4261,8 @@ pub unsafe fn _mm256_cvttps_epu64(a: __m128) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { - transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) +pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i { + unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -3984,7 +4274,7 @@ pub unsafe fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { +pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a) } @@ -3996,7 +4286,7 @@ pub unsafe fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvttps_epu64(a: __m256) -> __m512i { +pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i { _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -4009,13 +4299,15 @@ pub unsafe fn _mm512_cvttps_epu64(a: __m256) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { - transmute(vcvttps2uqq_512( - a.as_f32x8(), - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i { + unsafe { + transmute(vcvttps2uqq_512( + a.as_f32x8(), + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers @@ -4027,7 +4319,7 @@ pub unsafe fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vcvttps2uqq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { +pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a) } @@ -4041,8 +4333,8 @@ pub unsafe fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4054,9 +4346,11 @@ pub unsafe fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let b = _mm_mullo_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, b, src.as_i64x2())) +pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let b = _mm_mullo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, b, src.as_i64x2())) + } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4068,9 +4362,11 @@ pub unsafe fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let b = _mm_mullo_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, b, i64x2::ZERO)) +pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let b = _mm_mullo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) + } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4081,8 +4377,8 @@ pub unsafe fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4094,14 +4390,11 @@ pub unsafe fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_mullo_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let b = _mm256_mullo_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, b, src.as_i64x4())) +pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let b = _mm256_mullo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, b, src.as_i64x4())) + } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4113,9 +4406,11 @@ pub unsafe fn _mm256_mask_mullo_epi64( #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let b = _mm256_mullo_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, b, i64x4::ZERO)) +pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let b = _mm256_mullo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) + } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4126,8 +4421,8 @@ pub unsafe fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4139,14 +4434,11 @@ pub unsafe fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_mullo_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - let b = _mm512_mullo_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, b, src.as_i64x8())) +pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let b = _mm512_mullo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, b, src.as_i64x8())) + } } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4158,9 +4450,11 @@ pub unsafe fn _mm512_mask_mullo_epi64( #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vpmullq))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let b = _mm512_mullo_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, b, i64x8::ZERO)) +pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let b = _mm512_mullo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) + } } // Mask Registers @@ -4171,7 +4465,7 @@ pub unsafe fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> _ #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtmask8_u32(a: __mmask8) -> u32 { +pub fn _cvtmask8_u32(a: __mmask8) -> u32 { a as u32 } @@ -4181,7 +4475,7 @@ pub unsafe fn _cvtmask8_u32(a: __mmask8) -> u32 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtu32_mask8(a: u32) -> __mmask8 { +pub fn _cvtu32_mask8(a: u32) -> __mmask8 { a as __mmask8 } @@ -4191,7 +4485,7 @@ pub unsafe fn _cvtu32_mask8(a: u32) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { a + b } @@ -4201,7 +4495,7 @@ pub unsafe fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { a + b } @@ -4211,7 +4505,7 @@ pub unsafe fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { a & b } @@ -4221,7 +4515,7 @@ pub unsafe fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { _knot_mask8(a) & b } @@ -4231,7 +4525,7 @@ pub unsafe fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _knot_mask8(a: __mmask8) -> __mmask8 { +pub fn _knot_mask8(a: __mmask8) -> __mmask8 { a ^ 0b11111111 } @@ -4241,7 +4535,7 @@ pub unsafe fn _knot_mask8(a: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { a | b } @@ -4251,7 +4545,7 @@ pub unsafe fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { _knot_mask8(_kxor_mask8(a, b)) } @@ -4261,7 +4555,7 @@ pub unsafe fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { +pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 { a ^ b } @@ -4285,7 +4579,7 @@ pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { +pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { (_kor_mask8(a, b) == 0xff) as u8 } @@ -4296,7 +4590,7 @@ pub unsafe fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { +pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { (_kor_mask8(a, b) == 0) as u8 } @@ -4307,7 +4601,7 @@ pub unsafe fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { +pub fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { a << COUNT } @@ -4318,7 +4612,7 @@ pub unsafe fn _kshiftli_mask8(a: __mmask8) -> __mmask8 { #[target_feature(enable = "avx512dq")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftri_mask8(a: __mmask8) -> __mmask8 { +pub fn _kshiftri_mask8(a: __mmask8) -> __mmask8 { a >> COUNT } @@ -4355,7 +4649,7 @@ pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { +pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { (_kandn_mask16(a, b) == 0) as u8 } @@ -4366,7 +4660,7 @@ pub unsafe fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { +pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { (_kandn_mask8(a, b) == 0) as u8 } @@ -4377,7 +4671,7 @@ pub unsafe fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { +pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { (_kand_mask16(a, b) == 0) as u8 } @@ -4388,7 +4682,7 @@ pub unsafe fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { +pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 { (_kand_mask8(a, b) == 0) as u8 } @@ -4419,7 +4713,7 @@ pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { +pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { let zero = _mm_setzero_si128(); _mm_cmplt_epi32_mask(a, zero) } @@ -4431,7 +4725,7 @@ pub unsafe fn _mm_movepi32_mask(a: __m128i) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { +pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { let zero = _mm256_setzero_si256(); _mm256_cmplt_epi32_mask(a, zero) } @@ -4443,7 +4737,7 @@ pub unsafe fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { +pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { let zero = _mm512_setzero_si512(); _mm512_cmplt_epi32_mask(a, zero) } @@ -4455,7 +4749,7 @@ pub unsafe fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { +pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { let zero = _mm_setzero_si128(); _mm_cmplt_epi64_mask(a, zero) } @@ -4467,7 +4761,7 @@ pub unsafe fn _mm_movepi64_mask(a: __m128i) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { +pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { let zero = _mm256_setzero_si256(); _mm256_cmplt_epi64_mask(a, zero) } @@ -4479,7 +4773,7 @@ pub unsafe fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 { #[inline] #[target_feature(enable = "avx512dq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { +pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { let zero = _mm512_setzero_si512(); _mm512_cmplt_epi64_mask(a, zero) } @@ -4492,7 +4786,7 @@ pub unsafe fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmovm2d))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_movm_epi32(k: __mmask8) -> __m128i { +pub fn _mm_movm_epi32(k: __mmask8) -> __m128i { let ones = _mm_set1_epi32(-1); _mm_maskz_mov_epi32(k, ones) } @@ -4505,7 +4799,7 @@ pub unsafe fn _mm_movm_epi32(k: __mmask8) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmovm2d))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_movm_epi32(k: __mmask8) -> __m256i { +pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i { let ones = _mm256_set1_epi32(-1); _mm256_maskz_mov_epi32(k, ones) } @@ -4518,7 +4812,7 @@ pub unsafe fn _mm256_movm_epi32(k: __mmask8) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vpmovm2d))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_movm_epi32(k: __mmask16) -> __m512i { +pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i { let ones = _mm512_set1_epi32(-1); _mm512_maskz_mov_epi32(k, ones) } @@ -4531,7 +4825,7 @@ pub unsafe fn _mm512_movm_epi32(k: __mmask16) -> __m512i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmovm2q))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_movm_epi64(k: __mmask8) -> __m128i { +pub fn _mm_movm_epi64(k: __mmask8) -> __m128i { let ones = _mm_set1_epi64x(-1); _mm_maskz_mov_epi64(k, ones) } @@ -4544,7 +4838,7 @@ pub unsafe fn _mm_movm_epi64(k: __mmask8) -> __m128i { #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vpmovm2q))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_movm_epi64(k: __mmask8) -> __m256i { +pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i { let ones = _mm256_set1_epi64x(-1); _mm256_maskz_mov_epi64(k, ones) } @@ -4557,7 +4851,7 @@ pub unsafe fn _mm256_movm_epi64(k: __mmask8) -> __m256i { #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vpmovm2q))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_movm_epi64(k: __mmask8) -> __m512i { +pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i { let ones = _mm512_set1_epi64(-1); _mm512_maskz_mov_epi64(k, ones) } @@ -4578,10 +4872,7 @@ pub unsafe fn _mm512_movm_epi64(k: __mmask8) -> __m512i { #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_range_round_pd( - a: __m512d, - b: __m512d, -) -> __m512d { +pub fn _mm512_range_round_pd(a: __m512d, b: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 4); static_assert_sae!(SAE); _mm512_mask_range_round_pd::(_mm512_setzero_pd(), 0xff, a, b) @@ -4602,22 +4893,24 @@ pub unsafe fn _mm512_range_round_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_range_round_pd( +pub fn _mm512_mask_range_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 4); - static_assert_sae!(SAE); - transmute(vrangepd_512( - a.as_f64x8(), - b.as_f64x8(), - IMM8, - src.as_f64x8(), - k, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangepd_512( + a.as_f64x8(), + b.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + SAE, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4635,7 +4928,7 @@ pub unsafe fn _mm512_mask_range_round_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_range_round_pd( +pub fn _mm512_maskz_range_round_pd( k: __mmask8, a: __m512d, b: __m512d, @@ -4658,7 +4951,7 @@ pub unsafe fn _mm512_maskz_range_round_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_range_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_range_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_pd::(_mm_setzero_pd(), 0xff, a, b) } @@ -4677,20 +4970,22 @@ pub unsafe fn _mm_range_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_pd( +pub fn _mm_mask_range_pd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangepd_128( - a.as_f64x2(), - b.as_f64x2(), - IMM8, - src.as_f64x2(), - k, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_128( + a.as_f64x2(), + b.as_f64x2(), + IMM8, + src.as_f64x2(), + k, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4707,7 +5002,7 @@ pub unsafe fn _mm_mask_range_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_maskz_range_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_pd::(_mm_setzero_pd(), k, a, b) } @@ -4725,7 +5020,7 @@ pub unsafe fn _mm_maskz_range_pd(k: __mmask8, a: __m128d, b: __ #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_range_pd(a: __m256d, b: __m256d) -> __m256d { +pub fn _mm256_range_pd(a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 4); _mm256_mask_range_pd::(_mm256_setzero_pd(), 0xff, a, b) } @@ -4744,20 +5039,22 @@ pub unsafe fn _mm256_range_pd(a: __m256d, b: __m256d) -> __m256 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_range_pd( +pub fn _mm256_mask_range_pd( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangepd_256( - a.as_f64x4(), - b.as_f64x4(), - IMM8, - src.as_f64x4(), - k, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_256( + a.as_f64x4(), + b.as_f64x4(), + IMM8, + src.as_f64x4(), + k, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4774,11 +5071,7 @@ pub unsafe fn _mm256_mask_range_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_range_pd( - k: __mmask8, - a: __m256d, - b: __m256d, -) -> __m256d { +pub fn _mm256_maskz_range_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 4); _mm256_mask_range_pd::(_mm256_setzero_pd(), k, a, b) } @@ -4796,7 +5089,7 @@ pub unsafe fn _mm256_maskz_range_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_range_pd(a: __m512d, b: __m512d) -> __m512d { +pub fn _mm512_range_pd(a: __m512d, b: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 4); _mm512_mask_range_pd::(_mm512_setzero_pd(), 0xff, a, b) } @@ -4815,21 +5108,23 @@ pub unsafe fn _mm512_range_pd(a: __m512d, b: __m512d) -> __m512 #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_range_pd( +pub fn _mm512_mask_range_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangepd_512( - a.as_f64x8(), - b.as_f64x8(), - IMM8, - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangepd_512( + a.as_f64x8(), + b.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4846,11 +5141,7 @@ pub unsafe fn _mm512_mask_range_pd( #[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_range_pd( - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { +pub fn _mm512_maskz_range_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 4); _mm512_mask_range_pd::(_mm512_setzero_pd(), k, a, b) } @@ -4869,10 +5160,7 @@ pub unsafe fn _mm512_maskz_range_pd( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_range_round_ps( - a: __m512, - b: __m512, -) -> __m512 { +pub fn _mm512_range_round_ps(a: __m512, b: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 4); static_assert_sae!(SAE); _mm512_mask_range_round_ps::(_mm512_setzero_ps(), 0xffff, a, b) @@ -4892,22 +5180,24 @@ pub unsafe fn _mm512_range_round_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_range_round_ps( +pub fn _mm512_mask_range_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 4); - static_assert_sae!(SAE); - transmute(vrangeps_512( - a.as_f32x16(), - b.as_f32x16(), - IMM8, - src.as_f32x16(), - k, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangeps_512( + a.as_f32x16(), + b.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + SAE, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4924,7 +5214,7 @@ pub unsafe fn _mm512_mask_range_round_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_range_round_ps( +pub fn _mm512_maskz_range_round_ps( k: __mmask16, a: __m512, b: __m512, @@ -4947,7 +5237,7 @@ pub unsafe fn _mm512_maskz_range_round_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_range_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_range_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_ps::(_mm_setzero_ps(), 0xff, a, b) } @@ -4966,20 +5256,22 @@ pub unsafe fn _mm_range_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_ps( +pub fn _mm_mask_range_ps( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangeps_128( - a.as_f32x4(), - b.as_f32x4(), - IMM8, - src.as_f32x4(), - k, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_128( + a.as_f32x4(), + b.as_f32x4(), + IMM8, + src.as_f32x4(), + k, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -4996,7 +5288,7 @@ pub unsafe fn _mm_mask_range_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { +pub fn _mm_maskz_range_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_ps::(_mm_setzero_ps(), k, a, b) } @@ -5014,7 +5306,7 @@ pub unsafe fn _mm_maskz_range_ps(k: __mmask8, a: __m128, b: __m #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_range_ps(a: __m256, b: __m256) -> __m256 { +pub fn _mm256_range_ps(a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 4); _mm256_mask_range_ps::(_mm256_setzero_ps(), 0xff, a, b) } @@ -5033,20 +5325,22 @@ pub unsafe fn _mm256_range_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_range_ps( +pub fn _mm256_mask_range_ps( src: __m256, k: __mmask8, a: __m256, b: __m256, ) -> __m256 { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangeps_256( - a.as_f32x8(), - b.as_f32x8(), - IMM8, - src.as_f32x8(), - k, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_256( + a.as_f32x8(), + b.as_f32x8(), + IMM8, + src.as_f32x8(), + k, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -5063,7 +5357,7 @@ pub unsafe fn _mm256_mask_range_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_range_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { +pub fn _mm256_maskz_range_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 4); _mm256_mask_range_ps::(_mm256_setzero_ps(), k, a, b) } @@ -5081,7 +5375,7 @@ pub unsafe fn _mm256_maskz_range_ps(k: __mmask8, a: __m256, b: #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_range_ps(a: __m512, b: __m512) -> __m512 { +pub fn _mm512_range_ps(a: __m512, b: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 4); _mm512_mask_range_ps::(_mm512_setzero_ps(), 0xffff, a, b) } @@ -5100,21 +5394,23 @@ pub unsafe fn _mm512_range_ps(a: __m512, b: __m512) -> __m512 { #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_range_ps( +pub fn _mm512_mask_range_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangeps_512( - a.as_f32x16(), - b.as_f32x16(), - IMM8, - src.as_f32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangeps_512( + a.as_f32x16(), + b.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed @@ -5131,7 +5427,7 @@ pub unsafe fn _mm512_mask_range_ps( #[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_range_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { +pub fn _mm512_maskz_range_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 4); _mm512_mask_range_ps::(_mm512_setzero_ps(), k, a, b) } @@ -5151,10 +5447,7 @@ pub unsafe fn _mm512_maskz_range_ps(k: __mmask16, a: __m512, b: #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_range_round_sd( - a: __m128d, - b: __m128d, -) -> __m128d { +pub fn _mm_range_round_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 4); static_assert_sae!(SAE); _mm_mask_range_round_sd::(_mm_setzero_pd(), 0xff, a, b) @@ -5176,22 +5469,24 @@ pub unsafe fn _mm_range_round_sd( #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_round_sd( +pub fn _mm_mask_range_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 4); - static_assert_sae!(SAE); - transmute(vrangesd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - IMM8, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + SAE, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower @@ -5210,7 +5505,7 @@ pub unsafe fn _mm_mask_range_round_sd( #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_round_sd( +pub fn _mm_maskz_range_round_sd( k: __mmask8, a: __m128d, b: __m128d, @@ -5235,21 +5530,23 @@ pub unsafe fn _mm_maskz_range_round_sd( #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_sd( +pub fn _mm_mask_range_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangesd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - IMM8, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower @@ -5267,7 +5564,7 @@ pub unsafe fn _mm_mask_range_sd( #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_maskz_range_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_sd::(_mm_setzero_pd(), k, a, b) } @@ -5287,7 +5584,7 @@ pub unsafe fn _mm_maskz_range_sd(k: __mmask8, a: __m128d, b: __ #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_range_round_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_range_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 4); static_assert_sae!(SAE); _mm_mask_range_round_ss::(_mm_setzero_ps(), 0xff, a, b) @@ -5309,22 +5606,24 @@ pub unsafe fn _mm_range_round_ss(a: __m128, b: #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_round_ss( +pub fn _mm_mask_range_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 4); - static_assert_sae!(SAE); - transmute(vrangess( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - IMM8, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + static_assert_sae!(SAE); + transmute(vrangess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + SAE, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower @@ -5343,7 +5642,7 @@ pub unsafe fn _mm_mask_range_round_ss( #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_round_ss( +pub fn _mm_maskz_range_round_ss( k: __mmask8, a: __m128, b: __m128, @@ -5368,21 +5667,23 @@ pub unsafe fn _mm_maskz_range_round_ss( #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_range_ss( +pub fn _mm_mask_range_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 4); - transmute(vrangess( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - IMM8, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 4); + transmute(vrangess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower @@ -5400,7 +5701,7 @@ pub unsafe fn _mm_mask_range_ss( #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_range_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { +pub fn _mm_maskz_range_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 4); _mm_mask_range_ss::(_mm_setzero_ps(), k, a, b) } @@ -5425,7 +5726,7 @@ pub unsafe fn _mm_maskz_range_ss(k: __mmask8, a: __m128, b: __m #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_round_pd(a: __m512d) -> __m512d { +pub fn _mm512_reduce_round_pd(a: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm512_mask_reduce_round_pd::(_mm512_undefined_pd(), 0xff, a) @@ -5450,14 +5751,16 @@ pub unsafe fn _mm512_reduce_round_pd(a: __m512d #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_round_pd( +pub fn _mm512_mask_reduce_round_pd( src: __m512d, k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE)) + } } /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by @@ -5479,7 +5782,7 @@ pub unsafe fn _mm512_mask_reduce_round_pd( #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_reduce_round_pd( +pub fn _mm512_maskz_reduce_round_pd( k: __mmask8, a: __m512d, ) -> __m512d { @@ -5504,7 +5807,7 @@ pub unsafe fn _mm512_maskz_reduce_round_pd( #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_pd(a: __m128d) -> __m128d { +pub fn _mm_reduce_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_pd::(_mm_undefined_pd(), 0xff, a) } @@ -5526,13 +5829,11 @@ pub unsafe fn _mm_reduce_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_pd( - src: __m128d, - k: __mmask8, - a: __m128d, -) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k)) +pub fn _mm_mask_reduce_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k)) + } } /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by @@ -5552,7 +5853,7 @@ pub unsafe fn _mm_mask_reduce_pd( #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_pd(k: __mmask8, a: __m128d) -> __m128d { +pub fn _mm_maskz_reduce_pd(k: __mmask8, a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_pd::(_mm_setzero_pd(), k, a) } @@ -5573,7 +5874,7 @@ pub unsafe fn _mm_maskz_reduce_pd(k: __mmask8, a: __m128d) -> _ #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_pd(a: __m256d) -> __m256d { +pub fn _mm256_reduce_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_pd::(_mm256_undefined_pd(), 0xff, a) } @@ -5595,13 +5896,11 @@ pub unsafe fn _mm256_reduce_pd(a: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_pd( - src: __m256d, - k: __mmask8, - a: __m256d, -) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k)) +pub fn _mm256_mask_reduce_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k)) + } } /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by @@ -5621,7 +5920,7 @@ pub unsafe fn _mm256_mask_reduce_pd( #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_reduce_pd(k: __mmask8, a: __m256d) -> __m256d { +pub fn _mm256_maskz_reduce_pd(k: __mmask8, a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_pd::(_mm256_setzero_pd(), k, a) } @@ -5642,7 +5941,7 @@ pub unsafe fn _mm256_maskz_reduce_pd(k: __mmask8, a: __m256d) - #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_pd(a: __m512d) -> __m512d { +pub fn _mm512_reduce_pd(a: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_pd::(_mm512_undefined_pd(), 0xff, a) } @@ -5664,19 +5963,17 @@ pub unsafe fn _mm512_reduce_pd(a: __m512d) -> __m512d { #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_pd( - src: __m512d, - k: __mmask8, - a: __m512d, -) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreducepd_512( - a.as_f64x8(), - IMM8, - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_reduce_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducepd_512( + a.as_f64x8(), + IMM8, + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by @@ -5696,7 +5993,7 @@ pub unsafe fn _mm512_mask_reduce_pd( #[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_reduce_pd(k: __mmask8, a: __m512d) -> __m512d { +pub fn _mm512_maskz_reduce_pd(k: __mmask8, a: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_pd::(_mm512_setzero_pd(), k, a) } @@ -5719,7 +6016,7 @@ pub unsafe fn _mm512_maskz_reduce_pd(k: __mmask8, a: __m512d) - #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_round_ps(a: __m512) -> __m512 { +pub fn _mm512_reduce_round_ps(a: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm512_mask_reduce_round_ps::(_mm512_undefined_ps(), 0xffff, a) @@ -5744,14 +6041,16 @@ pub unsafe fn _mm512_reduce_round_ps(a: __m512) #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_round_ps( +pub fn _mm512_mask_reduce_round_ps( src: __m512, k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE)) + } } /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by @@ -5773,7 +6072,7 @@ pub unsafe fn _mm512_mask_reduce_round_ps( #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_reduce_round_ps( +pub fn _mm512_maskz_reduce_round_ps( k: __mmask16, a: __m512, ) -> __m512 { @@ -5798,7 +6097,7 @@ pub unsafe fn _mm512_maskz_reduce_round_ps( #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_ps(a: __m128) -> __m128 { +pub fn _mm_reduce_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ps::(_mm_undefined_ps(), 0xff, a) } @@ -5820,9 +6119,11 @@ pub unsafe fn _mm_reduce_ps(a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k)) +pub fn _mm_mask_reduce_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k)) + } } /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by @@ -5842,7 +6143,7 @@ pub unsafe fn _mm_mask_reduce_ps(src: __m128, k: __mmask8, a: _ #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_ps(k: __mmask8, a: __m128) -> __m128 { +pub fn _mm_maskz_reduce_ps(k: __mmask8, a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ps::(_mm_setzero_ps(), k, a) } @@ -5863,7 +6164,7 @@ pub unsafe fn _mm_maskz_reduce_ps(k: __mmask8, a: __m128) -> __ #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_reduce_ps(a: __m256) -> __m256 { +pub fn _mm256_reduce_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_ps::(_mm256_undefined_ps(), 0xff, a) } @@ -5885,13 +6186,11 @@ pub unsafe fn _mm256_reduce_ps(a: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_reduce_ps( - src: __m256, - k: __mmask8, - a: __m256, -) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k)) +pub fn _mm256_mask_reduce_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k)) + } } /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by @@ -5911,7 +6210,7 @@ pub unsafe fn _mm256_mask_reduce_ps( #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_reduce_ps(k: __mmask8, a: __m256) -> __m256 { +pub fn _mm256_maskz_reduce_ps(k: __mmask8, a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_ps::(_mm256_setzero_ps(), k, a) } @@ -5932,7 +6231,7 @@ pub unsafe fn _mm256_maskz_reduce_ps(k: __mmask8, a: __m256) -> #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_ps(a: __m512) -> __m512 { +pub fn _mm512_reduce_ps(a: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_ps::(_mm512_undefined_ps(), 0xffff, a) } @@ -5954,19 +6253,17 @@ pub unsafe fn _mm512_reduce_ps(a: __m512) -> __m512 { #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_ps( - src: __m512, - k: __mmask16, - a: __m512, -) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreduceps_512( - a.as_f32x16(), - IMM8, - src.as_f32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_reduce_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreduceps_512( + a.as_f32x16(), + IMM8, + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by @@ -5986,7 +6283,7 @@ pub unsafe fn _mm512_mask_reduce_ps( #[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_reduce_ps(k: __mmask16, a: __m512) -> __m512 { +pub fn _mm512_maskz_reduce_ps(k: __mmask16, a: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_ps::(_mm512_setzero_ps(), k, a) } @@ -6010,10 +6307,7 @@ pub unsafe fn _mm512_maskz_reduce_ps(k: __mmask16, a: __m512) - #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_round_sd( - a: __m128d, - b: __m128d, -) -> __m128d { +pub fn _mm_reduce_round_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm_mask_reduce_round_sd::(_mm_undefined_pd(), 0xff, a, b) @@ -6039,22 +6333,24 @@ pub unsafe fn _mm_reduce_round_sd( #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_round_sd( +pub fn _mm_mask_reduce_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - transmute(vreducesd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - IMM8, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + SAE, + )) + } } /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b @@ -6077,7 +6373,7 @@ pub unsafe fn _mm_mask_reduce_round_sd( #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_round_sd( +pub fn _mm_maskz_reduce_round_sd( k: __mmask8, a: __m128d, b: __m128d, @@ -6105,7 +6401,7 @@ pub unsafe fn _mm_maskz_reduce_round_sd( #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_sd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_reduce_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_sd::(_mm_undefined_pd(), 0xff, a, b) } @@ -6128,21 +6424,23 @@ pub unsafe fn _mm_reduce_sd(a: __m128d, b: __m128d) -> __m128d #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_sd( +pub fn _mm_mask_reduce_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreducesd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - IMM8, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducesd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b @@ -6163,7 +6461,7 @@ pub unsafe fn _mm_mask_reduce_sd( #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_maskz_reduce_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_sd::(_mm_setzero_pd(), k, a, b) } @@ -6188,7 +6486,7 @@ pub unsafe fn _mm_maskz_reduce_sd(k: __mmask8, a: __m128d, b: _ #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_round_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_reduce_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm_mask_reduce_round_ss::(_mm_undefined_ps(), 0xff, a, b) @@ -6214,22 +6512,24 @@ pub unsafe fn _mm_reduce_round_ss(a: __m128, b: #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_round_ss( +pub fn _mm_mask_reduce_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - transmute(vreducess( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - IMM8, - SAE, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + transmute(vreducess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + SAE, + )) + } } /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b @@ -6252,7 +6552,7 @@ pub unsafe fn _mm_mask_reduce_round_ss( #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_round_ss( +pub fn _mm_maskz_reduce_round_ss( k: __mmask8, a: __m128, b: __m128, @@ -6280,7 +6580,7 @@ pub unsafe fn _mm_maskz_reduce_round_ss( #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_reduce_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_reduce_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ss::(_mm_undefined_ps(), 0xff, a, b) } @@ -6303,21 +6603,23 @@ pub unsafe fn _mm_reduce_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_reduce_ss( +pub fn _mm_mask_reduce_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vreducess( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - IMM8, - _MM_FROUND_CUR_DIRECTION, - )) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vreducess( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + IMM8, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b @@ -6338,7 +6640,7 @@ pub unsafe fn _mm_mask_reduce_ss( #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_maskz_reduce_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { +pub fn _mm_maskz_reduce_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ss::(_mm_setzero_ps(), k, a, b) } @@ -6364,7 +6666,7 @@ pub unsafe fn _mm_maskz_reduce_ss(k: __mmask8, a: __m128, b: __ #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { +pub fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_fpclass_pd_mask::(0xff, a) } @@ -6389,9 +6691,11 @@ pub unsafe fn _mm_fpclass_pd_mask(a: __m128d) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1)) +pub fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1)) + } } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6413,7 +6717,7 @@ pub unsafe fn _mm_mask_fpclass_pd_mask(k1: __mmask8, a: __m128d #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { +pub fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_fpclass_pd_mask::(0xff, a) } @@ -6438,9 +6742,11 @@ pub unsafe fn _mm256_fpclass_pd_mask(a: __m256d) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m256d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1)) +pub fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1)) + } } /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified @@ -6462,7 +6768,7 @@ pub unsafe fn _mm256_mask_fpclass_pd_mask(k1: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { +pub fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_fpclass_pd_mask::(0xff, a) } @@ -6487,9 +6793,11 @@ pub unsafe fn _mm512_fpclass_pd_mask(a: __m512d) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m512d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1)) +pub fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1)) + } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -6511,7 +6819,7 @@ pub unsafe fn _mm512_mask_fpclass_pd_mask(k1: __mmask8, a: __m5 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { +pub fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_fpclass_ps_mask::(0xff, a) } @@ -6536,9 +6844,11 @@ pub unsafe fn _mm_fpclass_ps_mask(a: __m128) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1)) +pub fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1)) + } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -6560,7 +6870,7 @@ pub unsafe fn _mm_mask_fpclass_ps_mask(k1: __mmask8, a: __m128) #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { +pub fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_fpclass_ps_mask::(0xff, a) } @@ -6585,9 +6895,11 @@ pub unsafe fn _mm256_fpclass_ps_mask(a: __m256) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m256) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1)) +pub fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1)) + } } /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified @@ -6609,7 +6921,7 @@ pub unsafe fn _mm256_mask_fpclass_ps_mask(k1: __mmask8, a: __m2 #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { +pub fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_fpclass_ps_mask::(0xffff, a) } @@ -6634,9 +6946,11 @@ pub unsafe fn _mm512_fpclass_ps_mask(a: __m512) -> __mmask16 { #[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m512) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 8); - transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1)) +pub fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1)) + } } /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified @@ -6658,7 +6972,7 @@ pub unsafe fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_fpclass_sd_mask(a: __m128d) -> __mmask8 { +pub fn _mm_fpclass_sd_mask(a: __m128d) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_fpclass_sd_mask::(0xff, a) } @@ -6683,9 +6997,11 @@ pub unsafe fn _mm_fpclass_sd_mask(a: __m128d) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_fpclass_sd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - vfpclasssd(a.as_f64x2(), IMM8, k1) +pub fn _mm_mask_fpclass_sd_mask(k1: __mmask8, a: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclasssd(a.as_f64x2(), IMM8, k1) + } } /// Test the lower single-precision (32-bit) floating-point element in a for special categories specified @@ -6707,7 +7023,7 @@ pub unsafe fn _mm_mask_fpclass_sd_mask(k1: __mmask8, a: __m128d #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_fpclass_ss_mask(a: __m128) -> __mmask8 { +pub fn _mm_fpclass_ss_mask(a: __m128) -> __mmask8 { static_assert_uimm_bits!(IMM8, 8); _mm_mask_fpclass_ss_mask::(0xff, a) } @@ -6732,9 +7048,11 @@ pub unsafe fn _mm_fpclass_ss_mask(a: __m128) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_mask_fpclass_ss_mask(k1: __mmask8, a: __m128) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - vfpclassss(a.as_f32x4(), IMM8, k1) +pub fn _mm_mask_fpclass_ss_mask(k1: __mmask8, a: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclassss(a.as_f32x4(), IMM8, k1) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7b084a3ee7..d751b44119 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -17,10 +17,12 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { - let a = a.as_i32x16(); - let r = simd_select::(simd_lt(a, i32x16::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm512_abs_epi32(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let r = simd_select::(simd_lt(a, i32x16::ZERO), simd_neg(a), a); + transmute(r) + } } /// Computes the absolute value of packed 32-bit integers in `a`, and store the @@ -32,9 +34,11 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, abs, src.as_i32x16())) +pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, abs, src.as_i32x16())) + } } /// Computes the absolute value of packed 32-bit integers in `a`, and store the @@ -46,9 +50,11 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, abs, i32x16::ZERO)) +pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, abs, i32x16::ZERO)) + } } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -58,9 +64,11 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, abs, src.as_i32x8())) +pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, abs, src.as_i32x8())) + } } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -70,9 +78,11 @@ pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, abs, i32x8::ZERO)) +pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, abs, i32x8::ZERO)) + } } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -82,9 +92,11 @@ pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, abs, src.as_i32x4())) +pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, abs, src.as_i32x4())) + } } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -94,9 +106,11 @@ pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsd))] -pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, abs, i32x4::ZERO)) +pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, abs, i32x4::ZERO)) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -106,10 +120,12 @@ pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { - let a = a.as_i64x8(); - let r = simd_select::(simd_lt(a, i64x8::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm512_abs_epi64(a: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let r = simd_select::(simd_lt(a, i64x8::ZERO), simd_neg(a), a); + transmute(r) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -119,9 +135,11 @@ pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, abs, src.as_i64x8())) +pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, src.as_i64x8())) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -131,9 +149,11 @@ pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { - let abs = _mm512_abs_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, abs, i64x8::ZERO)) +pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, i64x8::ZERO)) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -143,10 +163,12 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i { - let a = a.as_i64x4(); - let r = simd_select::(simd_lt(a, i64x4::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm256_abs_epi64(a: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let r = simd_select::(simd_lt(a, i64x4::ZERO), simd_neg(a), a); + transmute(r) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -156,9 +178,11 @@ pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, abs, src.as_i64x4())) +pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, abs, src.as_i64x4())) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -168,9 +192,11 @@ pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { - let abs = _mm256_abs_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, abs, i64x4::ZERO)) +pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let abs = _mm256_abs_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, abs, i64x4::ZERO)) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -180,10 +206,12 @@ pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i { - let a = a.as_i64x2(); - let r = simd_select::(simd_lt(a, i64x2::ZERO), simd_neg(a), a); - transmute(r) +pub fn _mm_abs_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let r = simd_select::(simd_lt(a, i64x2::ZERO), simd_neg(a), a); + transmute(r) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -193,9 +221,11 @@ pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, abs, src.as_i64x2())) +pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, abs, src.as_i64x2())) + } } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -205,9 +235,11 @@ pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpabsq))] -pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { - let abs = _mm_abs_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, abs, i64x2::ZERO)) +pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let abs = _mm_abs_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, abs, i64x2::ZERO)) + } } /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst. @@ -217,8 +249,8 @@ pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 { - simd_fabs(v2) +pub fn _mm512_abs_ps(v2: __m512) -> __m512 { + unsafe { simd_fabs(v2) } } /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -228,8 +260,8 @@ pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 { - simd_select_bitmask(k, simd_fabs(v2), src) +pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fabs(v2), src) } } /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst. @@ -239,8 +271,8 @@ pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d { - simd_fabs(v2) +pub fn _mm512_abs_pd(v2: __m512d) -> __m512d { + unsafe { simd_fabs(v2) } } /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -250,8 +282,8 @@ pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d { - simd_select_bitmask(k, simd_fabs(v2), src) +pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fabs(v2), src) } } /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -261,9 +293,11 @@ pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - let mov = a.as_i32x16(); - transmute(simd_select_bitmask(k, mov, src.as_i32x16())) +pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i32x16(); + transmute(simd_select_bitmask(k, mov, src.as_i32x16())) + } } /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -273,9 +307,11 @@ pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i { - let mov = a.as_i32x16(); - transmute(simd_select_bitmask(k, mov, i32x16::ZERO)) +pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i32x16(); + transmute(simd_select_bitmask(k, mov, i32x16::ZERO)) + } } /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -285,9 +321,11 @@ pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let mov = a.as_i32x8(); - transmute(simd_select_bitmask(k, mov, src.as_i32x8())) +pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i32x8(); + transmute(simd_select_bitmask(k, mov, src.as_i32x8())) + } } /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -297,9 +335,11 @@ pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i { - let mov = a.as_i32x8(); - transmute(simd_select_bitmask(k, mov, i32x8::ZERO)) +pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i32x8(); + transmute(simd_select_bitmask(k, mov, i32x8::ZERO)) + } } /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -309,9 +349,11 @@ pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i32x4(); - transmute(simd_select_bitmask(k, mov, src.as_i32x4())) +pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i32x4(); + transmute(simd_select_bitmask(k, mov, src.as_i32x4())) + } } /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -321,9 +363,11 @@ pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] -pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i32x4(); - transmute(simd_select_bitmask(k, mov, i32x4::ZERO)) +pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i32x4(); + transmute(simd_select_bitmask(k, mov, i32x4::ZERO)) + } } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -333,9 +377,11 @@ pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - let mov = a.as_i64x8(); - transmute(simd_select_bitmask(k, mov, src.as_i64x8())) +pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i64x8(); + transmute(simd_select_bitmask(k, mov, src.as_i64x8())) + } } /// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -345,9 +391,11 @@ pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i { - let mov = a.as_i64x8(); - transmute(simd_select_bitmask(k, mov, i64x8::ZERO)) +pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + let mov = a.as_i64x8(); + transmute(simd_select_bitmask(k, mov, i64x8::ZERO)) + } } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -357,9 +405,11 @@ pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - let mov = a.as_i64x4(); - transmute(simd_select_bitmask(k, mov, src.as_i64x4())) +pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i64x4(); + transmute(simd_select_bitmask(k, mov, src.as_i64x4())) + } } /// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -369,9 +419,11 @@ pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i { - let mov = a.as_i64x4(); - transmute(simd_select_bitmask(k, mov, i64x4::ZERO)) +pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let mov = a.as_i64x4(); + transmute(simd_select_bitmask(k, mov, i64x4::ZERO)) + } } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -381,9 +433,11 @@ pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i64x2(); - transmute(simd_select_bitmask(k, mov, src.as_i64x2())) +pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i64x2(); + transmute(simd_select_bitmask(k, mov, src.as_i64x2())) + } } /// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -393,9 +447,11 @@ pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] -pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i { - let mov = a.as_i64x2(); - transmute(simd_select_bitmask(k, mov, i64x2::ZERO)) +pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let mov = a.as_i64x2(); + transmute(simd_select_bitmask(k, mov, i64x2::ZERO)) + } } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -405,9 +461,11 @@ pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - let mov = a.as_f32x16(); - transmute(simd_select_bitmask(k, mov, src.as_f32x16())) +pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov = a.as_f32x16(); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } } /// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -417,9 +475,11 @@ pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 { - let mov = a.as_f32x16(); - transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) +pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov = a.as_f32x16(); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -429,9 +489,11 @@ pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - let mov = a.as_f32x8(); - transmute(simd_select_bitmask(k, mov, src.as_f32x8())) +pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = a.as_f32x8(); + transmute(simd_select_bitmask(k, mov, src.as_f32x8())) + } } /// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -441,9 +503,11 @@ pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 { - let mov = a.as_f32x8(); - transmute(simd_select_bitmask(k, mov, f32x8::ZERO)) +pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = a.as_f32x8(); + transmute(simd_select_bitmask(k, mov, f32x8::ZERO)) + } } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -453,9 +517,11 @@ pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - let mov = a.as_f32x4(); - transmute(simd_select_bitmask(k, mov, src.as_f32x4())) +pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = a.as_f32x4(); + transmute(simd_select_bitmask(k, mov, src.as_f32x4())) + } } /// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -465,9 +531,11 @@ pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] -pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 { - let mov = a.as_f32x4(); - transmute(simd_select_bitmask(k, mov, f32x4::ZERO)) +pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = a.as_f32x4(); + transmute(simd_select_bitmask(k, mov, f32x4::ZERO)) + } } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -477,9 +545,11 @@ pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - let mov = a.as_f64x8(); - transmute(simd_select_bitmask(k, mov, src.as_f64x8())) +pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov = a.as_f64x8(); + transmute(simd_select_bitmask(k, mov, src.as_f64x8())) + } } /// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -489,9 +559,11 @@ pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d { - let mov = a.as_f64x8(); - transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) +pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov = a.as_f64x8(); + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) + } } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -501,9 +573,11 @@ pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - let mov = a.as_f64x4(); - transmute(simd_select_bitmask(k, mov, src.as_f64x4())) +pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = a.as_f64x4(); + transmute(simd_select_bitmask(k, mov, src.as_f64x4())) + } } /// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -513,9 +587,11 @@ pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d { - let mov = a.as_f64x4(); - transmute(simd_select_bitmask(k, mov, f64x4::ZERO)) +pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = a.as_f64x4(); + transmute(simd_select_bitmask(k, mov, f64x4::ZERO)) + } } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -525,9 +601,11 @@ pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - let mov = a.as_f64x2(); - transmute(simd_select_bitmask(k, mov, src.as_f64x2())) +pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = a.as_f64x2(); + transmute(simd_select_bitmask(k, mov, src.as_f64x2())) + } } /// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -537,9 +615,11 @@ pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] -pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d { - let mov = a.as_f64x2(); - transmute(simd_select_bitmask(k, mov, f64x2::ZERO)) +pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = a.as_f64x2(); + transmute(simd_select_bitmask(k, mov, f64x2::ZERO)) + } } /// Add packed 32-bit integers in a and b, and store the results in dst. @@ -549,8 +629,8 @@ pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_add(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) } } /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -560,9 +640,11 @@ pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, add, src.as_i32x16())) +pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, add, src.as_i32x16())) + } } /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -572,9 +654,11 @@ pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, add, i32x16::ZERO)) +pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, add, i32x16::ZERO)) + } } /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -584,9 +668,11 @@ pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, add, src.as_i32x8())) +pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, add, src.as_i32x8())) + } } /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -596,9 +682,11 @@ pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, add, i32x8::ZERO)) +pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, add, i32x8::ZERO)) + } } /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -608,9 +696,11 @@ pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, add, src.as_i32x4())) +pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, add, src.as_i32x4())) + } } /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -620,9 +710,11 @@ pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddd))] -pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, add, i32x4::ZERO)) +pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, add, i32x4::ZERO)) + } } /// Add packed 64-bit integers in a and b, and store the results in dst. @@ -632,8 +724,8 @@ pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_add(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) } } /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -643,9 +735,11 @@ pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, add, src.as_i64x8())) +pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, add, src.as_i64x8())) + } } /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -655,9 +749,11 @@ pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let add = _mm512_add_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, add, i64x8::ZERO)) +pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let add = _mm512_add_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, add, i64x8::ZERO)) + } } /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -667,9 +763,11 @@ pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, add, src.as_i64x4())) +pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, add, src.as_i64x4())) + } } /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -679,9 +777,11 @@ pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let add = _mm256_add_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, add, i64x4::ZERO)) +pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let add = _mm256_add_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, add, i64x4::ZERO)) + } } /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -691,9 +791,11 @@ pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, add, src.as_i64x2())) +pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, add, src.as_i64x2())) + } } /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -703,9 +805,11 @@ pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpaddq))] -pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let add = _mm_add_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, add, i64x2::ZERO)) +pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let add = _mm_add_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, add, i64x2::ZERO)) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. @@ -715,8 +819,8 @@ pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_add(a.as_f32x16(), b.as_f32x16())) +pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -726,9 +830,11 @@ pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let add = _mm512_add_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, add, src.as_f32x16())) +pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let add = _mm512_add_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, add, src.as_f32x16())) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -738,9 +844,11 @@ pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let add = _mm512_add_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, add, f32x16::ZERO)) +pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let add = _mm512_add_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, add, f32x16::ZERO)) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -750,9 +858,11 @@ pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let add = _mm256_add_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, add, src.as_f32x8())) +pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let add = _mm256_add_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, add, src.as_f32x8())) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -762,9 +872,11 @@ pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let add = _mm256_add_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, add, f32x8::ZERO)) +pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let add = _mm256_add_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, add, f32x8::ZERO)) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -774,9 +886,11 @@ pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let add = _mm_add_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, add, src.as_f32x4())) +pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let add = _mm_add_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, add, src.as_f32x4())) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -786,9 +900,11 @@ pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps))] -pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let add = _mm_add_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, add, f32x4::ZERO)) +pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let add = _mm_add_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, add, f32x4::ZERO)) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. @@ -798,8 +914,8 @@ pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_add(a.as_f64x8(), b.as_f64x8())) +pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -809,9 +925,11 @@ pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let add = _mm512_add_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, add, src.as_f64x8())) +pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let add = _mm512_add_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, add, src.as_f64x8())) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -821,9 +939,11 @@ pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let add = _mm512_add_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, add, f64x8::ZERO)) +pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let add = _mm512_add_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, add, f64x8::ZERO)) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -833,9 +953,11 @@ pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let add = _mm256_add_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, add, src.as_f64x4())) +pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let add = _mm256_add_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, add, src.as_f64x4())) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -845,9 +967,11 @@ pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let add = _mm256_add_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, add, f64x4::ZERO)) +pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let add = _mm256_add_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, add, f64x4::ZERO)) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -857,9 +981,11 @@ pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let add = _mm_add_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, add, src.as_f64x2())) +pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let add = _mm_add_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, add, src.as_f64x2())) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -869,9 +995,11 @@ pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd))] -pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let add = _mm_add_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, add, f64x2::ZERO)) +pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let add = _mm_add_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, add, f64x2::ZERO)) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst. @@ -881,8 +1009,8 @@ pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -892,9 +1020,11 @@ pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, sub, src.as_i32x16())) +pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, sub, src.as_i32x16())) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -904,9 +1034,11 @@ pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, sub, i32x16::ZERO)) +pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, sub, i32x16::ZERO)) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -916,9 +1048,11 @@ pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, sub, src.as_i32x8())) +pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, sub, src.as_i32x8())) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -928,9 +1062,11 @@ pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, sub, i32x8::ZERO)) +pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, sub, i32x8::ZERO)) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -940,9 +1076,11 @@ pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, sub, src.as_i32x4())) +pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, sub, src.as_i32x4())) + } } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -952,9 +1090,11 @@ pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubd))] -pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, sub, i32x4::ZERO)) +pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, sub, i32x4::ZERO)) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst. @@ -964,8 +1104,8 @@ pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -975,9 +1115,11 @@ pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, sub, src.as_i64x8())) +pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, sub, src.as_i64x8())) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -987,9 +1129,11 @@ pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let sub = _mm512_sub_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, sub, i64x8::ZERO)) +pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let sub = _mm512_sub_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, sub, i64x8::ZERO)) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -999,9 +1143,11 @@ pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, sub, src.as_i64x4())) +pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, sub, src.as_i64x4())) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1011,9 +1157,11 @@ pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let sub = _mm256_sub_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, sub, i64x4::ZERO)) +pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let sub = _mm256_sub_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, sub, i64x4::ZERO)) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1023,9 +1171,11 @@ pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, sub, src.as_i64x2())) +pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, sub, src.as_i64x2())) + } } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1035,9 +1185,11 @@ pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsubq))] -pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let sub = _mm_sub_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, sub, i64x2::ZERO)) +pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let sub = _mm_sub_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, sub, i64x2::ZERO)) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst. @@ -1047,8 +1199,8 @@ pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) +pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1058,9 +1210,11 @@ pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let sub = _mm512_sub_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, sub, src.as_f32x16())) +pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, sub, src.as_f32x16())) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1070,9 +1224,11 @@ pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let sub = _mm512_sub_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, sub, f32x16::ZERO)) +pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, sub, f32x16::ZERO)) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1082,9 +1238,11 @@ pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let sub = _mm256_sub_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, sub, src.as_f32x8())) +pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let sub = _mm256_sub_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, sub, src.as_f32x8())) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1094,9 +1252,11 @@ pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let sub = _mm256_sub_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, sub, f32x8::ZERO)) +pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let sub = _mm256_sub_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, sub, f32x8::ZERO)) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1106,9 +1266,11 @@ pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let sub = _mm_sub_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, sub, src.as_f32x4())) +pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let sub = _mm_sub_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, sub, src.as_f32x4())) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1118,9 +1280,11 @@ pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps))] -pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let sub = _mm_sub_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, sub, f32x4::ZERO)) +pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let sub = _mm_sub_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, sub, f32x4::ZERO)) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst. @@ -1130,8 +1294,8 @@ pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) +pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1141,9 +1305,11 @@ pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let sub = _mm512_sub_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, sub, src.as_f64x8())) +pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, sub, src.as_f64x8())) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1153,9 +1319,11 @@ pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let sub = _mm512_sub_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, sub, f64x8::ZERO)) +pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, sub, f64x8::ZERO)) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1165,9 +1333,11 @@ pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let sub = _mm256_sub_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, sub, src.as_f64x4())) +pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let sub = _mm256_sub_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, sub, src.as_f64x4())) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1177,9 +1347,11 @@ pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let sub = _mm256_sub_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, sub, f64x4::ZERO)) +pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let sub = _mm256_sub_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, sub, f64x4::ZERO)) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1189,9 +1361,11 @@ pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let sub = _mm_sub_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, sub, src.as_f64x2())) +pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let sub = _mm_sub_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, sub, src.as_f64x2())) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1201,9 +1375,11 @@ pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd))] -pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let sub = _mm_sub_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, sub, f64x2::ZERO)) +pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let sub = _mm_sub_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, sub, f64x2::ZERO)) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst. @@ -1213,10 +1389,12 @@ pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { - let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8())); - let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8())); - transmute(simd_mul(a, b)) +pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8())); + let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8())); + transmute(simd_mul(a, b)) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1226,9 +1404,11 @@ pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mul_epi32(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, mul, src.as_i64x8())) +pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1238,9 +1418,11 @@ pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mul_epi32(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, mul, i64x8::ZERO)) +pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, i64x8::ZERO)) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1250,9 +1432,11 @@ pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mul_epi32(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, mul, src.as_i64x4())) +pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epi32(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, mul, src.as_i64x4())) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1262,9 +1446,11 @@ pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mul_epi32(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, mul, i64x4::ZERO)) +pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epi32(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, mul, i64x4::ZERO)) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1274,9 +1460,11 @@ pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mul_epi32(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, mul, src.as_i64x2())) +pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epi32(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, mul, src.as_i64x2())) + } } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1286,9 +1474,11 @@ pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] -pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mul_epi32(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, mul, i64x2::ZERO)) +pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epi32(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, mul, i64x2::ZERO)) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst. @@ -1298,8 +1488,8 @@ pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1309,14 +1499,11 @@ pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm512_mask_mullo_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mullo_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, mul, src.as_i32x16())) +pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, mul, src.as_i32x16())) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1326,9 +1513,11 @@ pub unsafe fn _mm512_mask_mullo_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mullo_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, mul, i32x16::ZERO)) +pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, mul, i32x16::ZERO)) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1338,14 +1527,11 @@ pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm256_mask_mullo_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let mul = _mm256_mullo_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, mul, src.as_i32x8())) +pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, mul, src.as_i32x8())) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1355,9 +1541,11 @@ pub unsafe fn _mm256_mask_mullo_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mullo_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, mul, i32x8::ZERO)) +pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mullo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, mul, i32x8::ZERO)) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1367,9 +1555,11 @@ pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mullo_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, mul, src.as_i32x4())) +pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, mul, src.as_i32x4())) + } } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1379,9 +1569,11 @@ pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmulld))] -pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mullo_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, mul, i32x4::ZERO)) +pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mullo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, mul, i32x4::ZERO)) + } } /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst. @@ -1392,8 +1584,8 @@ pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m1 #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } } /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1404,14 +1596,11 @@ pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_mullox_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - let mul = _mm512_mullox_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, mul, src.as_i64x8())) +pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mullox_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst. @@ -1421,11 +1610,13 @@ pub unsafe fn _mm512_mask_mullox_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u64x8(); - let b = b.as_u64x8(); - let mask = u64x8::splat(u32::MAX.into()); - transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) +pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let mask = u64x8::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1435,9 +1626,11 @@ pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mul_epu32(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, mul, src.as_u64x8())) +pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, mul, src.as_u64x8())) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1447,9 +1640,11 @@ pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let mul = _mm512_mul_epu32(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, mul, u64x8::ZERO)) +pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, mul, u64x8::ZERO)) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1459,9 +1654,11 @@ pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mul_epu32(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, mul, src.as_u64x4())) +pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epu32(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, mul, src.as_u64x4())) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1471,9 +1668,11 @@ pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let mul = _mm256_mul_epu32(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, mul, u64x4::ZERO)) +pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let mul = _mm256_mul_epu32(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, mul, u64x4::ZERO)) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1483,9 +1682,11 @@ pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mul_epu32(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, mul, src.as_u64x2())) +pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epu32(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, mul, src.as_u64x2())) + } } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1495,9 +1696,11 @@ pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] -pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let mul = _mm_mul_epu32(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, mul, u64x2::ZERO)) +pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let mul = _mm_mul_epu32(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, mul, u64x2::ZERO)) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. @@ -1507,8 +1710,8 @@ pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) +pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1518,9 +1721,11 @@ pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let mul = _mm512_mul_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, mul, src.as_f32x16())) +pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, mul, src.as_f32x16())) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1530,9 +1735,11 @@ pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let mul = _mm512_mul_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, mul, f32x16::ZERO)) +pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, mul, f32x16::ZERO)) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1542,9 +1749,11 @@ pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let mul = _mm256_mul_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, mul, src.as_f32x8())) +pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let mul = _mm256_mul_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, mul, src.as_f32x8())) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1554,9 +1763,11 @@ pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let mul = _mm256_mul_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, mul, f32x8::ZERO)) +pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let mul = _mm256_mul_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, mul, f32x8::ZERO)) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1566,9 +1777,11 @@ pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mul = _mm_mul_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, mul, src.as_f32x4())) +pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mul = _mm_mul_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, mul, src.as_f32x4())) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1578,9 +1791,11 @@ pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps))] -pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mul = _mm_mul_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, mul, f32x4::ZERO)) +pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mul = _mm_mul_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, mul, f32x4::ZERO)) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. @@ -1590,8 +1805,8 @@ pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) +pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1601,9 +1816,11 @@ pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let mul = _mm512_mul_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, mul, src.as_f64x8())) +pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, mul, src.as_f64x8())) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1613,9 +1830,11 @@ pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let mul = _mm512_mul_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, mul, f64x8::ZERO)) +pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, mul, f64x8::ZERO)) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1625,9 +1844,11 @@ pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let mul = _mm256_mul_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, mul, src.as_f64x4())) +pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let mul = _mm256_mul_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, mul, src.as_f64x4())) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1637,9 +1858,11 @@ pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let mul = _mm256_mul_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, mul, f64x4::ZERO)) +pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let mul = _mm256_mul_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, mul, f64x4::ZERO)) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1649,9 +1872,11 @@ pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mul = _mm_mul_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, mul, src.as_f64x2())) +pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mul = _mm_mul_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, mul, src.as_f64x2())) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1661,9 +1886,11 @@ pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd))] -pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mul = _mm_mul_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, mul, f64x2::ZERO)) +pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mul = _mm_mul_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, mul, f64x2::ZERO)) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst. @@ -1673,8 +1900,8 @@ pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 { - transmute(simd_div(a.as_f32x16(), b.as_f32x16())) +pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1684,9 +1911,11 @@ pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let div = _mm512_div_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, div, src.as_f32x16())) +pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let div = _mm512_div_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, div, src.as_f32x16())) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1696,9 +1925,11 @@ pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let div = _mm512_div_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, div, f32x16::ZERO)) +pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let div = _mm512_div_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, div, f32x16::ZERO)) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1708,9 +1939,11 @@ pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let div = _mm256_div_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, div, src.as_f32x8())) +pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let div = _mm256_div_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, div, src.as_f32x8())) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1720,9 +1953,11 @@ pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let div = _mm256_div_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, div, f32x8::ZERO)) +pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let div = _mm256_div_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, div, f32x8::ZERO)) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1732,9 +1967,11 @@ pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let div = _mm_div_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, div, src.as_f32x4())) +pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let div = _mm_div_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, div, src.as_f32x4())) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1744,9 +1981,11 @@ pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps))] -pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let div = _mm_div_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, div, f32x4::ZERO)) +pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let div = _mm_div_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, div, f32x4::ZERO)) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst. @@ -1756,8 +1995,8 @@ pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(simd_div(a.as_f64x8(), b.as_f64x8())) +pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1767,9 +2006,11 @@ pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let div = _mm512_div_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, div, src.as_f64x8())) +pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let div = _mm512_div_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, div, src.as_f64x8())) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1779,9 +2020,11 @@ pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let div = _mm512_div_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, div, f64x8::ZERO)) +pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let div = _mm512_div_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, div, f64x8::ZERO)) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1791,9 +2034,11 @@ pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let div = _mm256_div_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, div, src.as_f64x4())) +pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let div = _mm256_div_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, div, src.as_f64x4())) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1803,9 +2048,11 @@ pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let div = _mm256_div_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, div, f64x4::ZERO)) +pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let div = _mm256_div_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, div, f64x4::ZERO)) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1815,9 +2062,11 @@ pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let div = _mm_div_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, div, src.as_f64x2())) +pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let div = _mm_div_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, div, src.as_f64x2())) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1827,9 +2076,11 @@ pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd))] -pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let div = _mm_div_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, div, f64x2::ZERO)) +pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let div = _mm_div_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, div, f64x2::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst. @@ -1839,10 +2090,12 @@ pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i32x16(); - let b = b.as_i32x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1852,9 +2105,11 @@ pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, max, src.as_i32x16())) +pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, src.as_i32x16())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1864,9 +2119,11 @@ pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, max, i32x16::ZERO)) +pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, i32x16::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1876,9 +2133,11 @@ pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, max, src.as_i32x8())) +pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, max, src.as_i32x8())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1888,9 +2147,11 @@ pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, max, i32x8::ZERO)) +pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, max, i32x8::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1900,9 +2161,11 @@ pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, max, src.as_i32x4())) +pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, max, src.as_i32x4())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1912,9 +2175,11 @@ pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] -pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, max, i32x4::ZERO)) +pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, max, i32x4::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -1924,10 +2189,12 @@ pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i64x8(); - let b = b.as_i64x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1937,9 +2204,11 @@ pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, max, src.as_i64x8())) +pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, src.as_i64x8())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1949,9 +2218,11 @@ pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, max, i64x8::ZERO)) +pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, i64x8::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -1961,10 +2232,12 @@ pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i64x4(); - let b = b.as_i64x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1974,9 +2247,11 @@ pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, max, src.as_i64x4())) +pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, max, src.as_i64x4())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1986,9 +2261,11 @@ pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, max, i64x4::ZERO)) +pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, max, i64x4::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -1998,10 +2275,12 @@ pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i64x2(); - let b = b.as_i64x2(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2011,9 +2290,11 @@ pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, max, src.as_i64x2())) +pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, max, src.as_i64x2())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2023,9 +2304,11 @@ pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] -pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, max, i64x2::ZERO)) +pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, max, i64x2::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst. @@ -2035,12 +2318,14 @@ pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 { - transmute(vmaxps( - a.as_f32x16(), - b.as_f32x16(), - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vmaxps( + a.as_f32x16(), + b.as_f32x16(), + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2050,9 +2335,11 @@ pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let max = _mm512_max_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, max, src.as_f32x16())) +pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let max = _mm512_max_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, src.as_f32x16())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2062,9 +2349,11 @@ pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let max = _mm512_max_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, max, f32x16::ZERO)) +pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let max = _mm512_max_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, f32x16::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2074,9 +2363,11 @@ pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let max = _mm256_max_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, max, src.as_f32x8())) +pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let max = _mm256_max_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, max, src.as_f32x8())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2086,9 +2377,11 @@ pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let max = _mm256_max_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, max, f32x8::ZERO)) +pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let max = _mm256_max_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, max, f32x8::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2098,9 +2391,11 @@ pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let max = _mm_max_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, max, src.as_f32x4())) +pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let max = _mm_max_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, max, src.as_f32x4())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2110,9 +2405,11 @@ pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps))] -pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let max = _mm_max_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, max, f32x4::ZERO)) +pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let max = _mm_max_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, max, f32x4::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst. @@ -2122,8 +2419,8 @@ pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2133,9 +2430,11 @@ pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let max = _mm512_max_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, max, src.as_f64x8())) +pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let max = _mm512_max_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, src.as_f64x8())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2145,9 +2444,11 @@ pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let max = _mm512_max_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, max, f64x8::ZERO)) +pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let max = _mm512_max_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, f64x8::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2157,9 +2458,11 @@ pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let max = _mm256_max_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, max, src.as_f64x4())) +pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let max = _mm256_max_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, max, src.as_f64x4())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2169,9 +2472,11 @@ pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let max = _mm256_max_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, max, f64x4::ZERO)) +pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let max = _mm256_max_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, max, f64x4::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2181,9 +2486,11 @@ pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let max = _mm_max_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, max, src.as_f64x2())) +pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let max = _mm_max_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, max, src.as_f64x2())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2193,9 +2500,11 @@ pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd))] -pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let max = _mm_max_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, max, f64x2::ZERO)) +pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let max = _mm_max_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, max, f64x2::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst. @@ -2205,10 +2514,12 @@ pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u32x16(); - let b = b.as_u32x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2218,9 +2529,11 @@ pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu32(a, b).as_u32x16(); - transmute(simd_select_bitmask(k, max, src.as_u32x16())) +pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, src.as_u32x16())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2230,9 +2543,11 @@ pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu32(a, b).as_u32x16(); - transmute(simd_select_bitmask(k, max, u32x16::ZERO)) +pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, u32x16::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2242,9 +2557,11 @@ pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu32(a, b).as_u32x8(); - transmute(simd_select_bitmask(k, max, src.as_u32x8())) +pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, max, src.as_u32x8())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2254,9 +2571,11 @@ pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu32(a, b).as_u32x8(); - transmute(simd_select_bitmask(k, max, u32x8::ZERO)) +pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, max, u32x8::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2266,9 +2585,11 @@ pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu32(a, b).as_u32x4(); - transmute(simd_select_bitmask(k, max, src.as_u32x4())) +pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, max, src.as_u32x4())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2278,9 +2599,11 @@ pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] -pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu32(a, b).as_u32x4(); - transmute(simd_select_bitmask(k, max, u32x4::ZERO)) +pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, max, u32x4::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2290,10 +2613,12 @@ pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u64x8(); - let b = b.as_u64x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2303,9 +2628,11 @@ pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu64(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, max, src.as_u64x8())) +pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, src.as_u64x8())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2315,9 +2642,11 @@ pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let max = _mm512_max_epu64(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, max, u64x8::ZERO)) +pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = _mm512_max_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, u64x8::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2327,10 +2656,12 @@ pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u64x4(); - let b = b.as_u64x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2340,9 +2671,11 @@ pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu64(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, max, src.as_u64x4())) +pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, max, src.as_u64x4())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2352,9 +2685,11 @@ pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let max = _mm256_max_epu64(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, max, u64x4::ZERO)) +pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = _mm256_max_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, max, u64x4::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2364,10 +2699,12 @@ pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u64x2(); - let b = b.as_u64x2(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2377,9 +2714,11 @@ pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu64(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, max, src.as_u64x2())) +pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, max, src.as_u64x2())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2389,9 +2728,11 @@ pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] -pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let max = _mm_max_epu64(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, max, u64x2::ZERO)) +pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = _mm_max_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, max, u64x2::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst. @@ -2401,10 +2742,12 @@ pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i32x16(); - let b = b.as_i32x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2414,9 +2757,11 @@ pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, min, src.as_i32x16())) +pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, min, src.as_i32x16())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2426,9 +2771,11 @@ pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, min, i32x16::ZERO)) +pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, min, i32x16::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2438,9 +2785,11 @@ pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, min, src.as_i32x8())) +pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, min, src.as_i32x8())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2450,9 +2799,11 @@ pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, min, i32x8::ZERO)) +pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, min, i32x8::ZERO)) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2462,9 +2813,11 @@ pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, min, src.as_i32x4())) +pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, min, src.as_i32x4())) + } } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2474,9 +2827,11 @@ pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] -pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, min, i32x4::ZERO)) +pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, min, i32x4::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2486,10 +2841,12 @@ pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i64x8(); - let b = b.as_i64x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2499,9 +2856,11 @@ pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, min, src.as_i64x8())) +pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, min, src.as_i64x8())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2511,9 +2870,11 @@ pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, min, i64x8::ZERO)) +pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, min, i64x8::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2523,10 +2884,12 @@ pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_i64x4(); - let b = b.as_i64x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2536,9 +2899,11 @@ pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, min, src.as_i64x4())) +pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, min, src.as_i64x4())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2548,9 +2913,11 @@ pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, min, i64x4::ZERO)) +pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, min, i64x4::ZERO)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2560,10 +2927,12 @@ pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i64x2(); - let b = b.as_i64x2(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2573,9 +2942,11 @@ pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, min, src.as_i64x2())) +pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, min, src.as_i64x2())) + } } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2585,9 +2956,11 @@ pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] -pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, min, i64x2::ZERO)) +pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, min, i64x2::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst. @@ -2597,12 +2970,14 @@ pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 { - transmute(vminps( - a.as_f32x16(), - b.as_f32x16(), - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vminps( + a.as_f32x16(), + b.as_f32x16(), + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2612,9 +2987,11 @@ pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let min = _mm512_min_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, min, src.as_f32x16())) +pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let min = _mm512_min_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, min, src.as_f32x16())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2624,9 +3001,11 @@ pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let min = _mm512_min_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, min, f32x16::ZERO)) +pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let min = _mm512_min_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, min, f32x16::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2636,9 +3015,11 @@ pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let min = _mm256_min_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, min, src.as_f32x8())) +pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let min = _mm256_min_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, min, src.as_f32x8())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2648,9 +3029,11 @@ pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let min = _mm256_min_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, min, f32x8::ZERO)) +pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let min = _mm256_min_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, min, f32x8::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2660,9 +3043,11 @@ pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let min = _mm_min_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, min, src.as_f32x4())) +pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let min = _mm_min_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, min, src.as_f32x4())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2672,9 +3057,11 @@ pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps))] -pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let min = _mm_min_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, min, f32x4::ZERO)) +pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let min = _mm_min_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, min, f32x4::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst. @@ -2684,8 +3071,8 @@ pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) +pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2695,9 +3082,11 @@ pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let min = _mm512_min_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, min, src.as_f64x8())) +pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let min = _mm512_min_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, min, src.as_f64x8())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2707,9 +3096,11 @@ pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let min = _mm512_min_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, min, f64x8::ZERO)) +pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let min = _mm512_min_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, min, f64x8::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2719,9 +3110,11 @@ pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let min = _mm256_min_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, min, src.as_f64x4())) +pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let min = _mm256_min_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, min, src.as_f64x4())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2731,9 +3124,11 @@ pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let min = _mm256_min_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, min, f64x4::ZERO)) +pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let min = _mm256_min_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, min, f64x4::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2743,9 +3138,11 @@ pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let min = _mm_min_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, min, src.as_f64x2())) +pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let min = _mm_min_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, min, src.as_f64x2())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2755,9 +3152,11 @@ pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd))] -pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let min = _mm_min_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, min, f64x2::ZERO)) +pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let min = _mm_min_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, min, f64x2::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst. @@ -2767,10 +3166,12 @@ pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u32x16(); - let b = b.as_u32x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2780,9 +3181,11 @@ pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu32(a, b).as_u32x16(); - transmute(simd_select_bitmask(k, min, src.as_u32x16())) +pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, min, src.as_u32x16())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2792,9 +3195,11 @@ pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu32(a, b).as_u32x16(); - transmute(simd_select_bitmask(k, min, u32x16::ZERO)) +pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, min, u32x16::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2804,9 +3209,11 @@ pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu32(a, b).as_u32x8(); - transmute(simd_select_bitmask(k, min, src.as_u32x8())) +pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, min, src.as_u32x8())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2816,9 +3223,11 @@ pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu32(a, b).as_u32x8(); - transmute(simd_select_bitmask(k, min, u32x8::ZERO)) +pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu32(a, b).as_u32x8(); + transmute(simd_select_bitmask(k, min, u32x8::ZERO)) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2828,9 +3237,11 @@ pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu32(a, b).as_u32x4(); - transmute(simd_select_bitmask(k, min, src.as_u32x4())) +pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, min, src.as_u32x4())) + } } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2840,9 +3251,11 @@ pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] -pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu32(a, b).as_u32x4(); - transmute(simd_select_bitmask(k, min, u32x4::ZERO)) +pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu32(a, b).as_u32x4(); + transmute(simd_select_bitmask(k, min, u32x4::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -2852,10 +3265,12 @@ pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_u64x8(); - let b = b.as_u64x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2865,9 +3280,11 @@ pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu64(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, min, src.as_u64x8())) +pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, min, src.as_u64x8())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2877,9 +3294,11 @@ pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let min = _mm512_min_epu64(a, b).as_u64x8(); - transmute(simd_select_bitmask(k, min, u64x8::ZERO)) +pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let min = _mm512_min_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, min, u64x8::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -2889,10 +3308,12 @@ pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i { - let a = a.as_u64x4(); - let b = b.as_u64x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2902,9 +3323,11 @@ pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu64(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, min, src.as_u64x4())) +pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, min, src.as_u64x4())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2914,9 +3337,11 @@ pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let min = _mm256_min_epu64(a, b).as_u64x4(); - transmute(simd_select_bitmask(k, min, u64x4::ZERO)) +pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let min = _mm256_min_epu64(a, b).as_u64x4(); + transmute(simd_select_bitmask(k, min, u64x4::ZERO)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -2926,10 +3351,12 @@ pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u64x2(); - let b = b.as_u64x2(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2939,9 +3366,11 @@ pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu64(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, min, src.as_u64x2())) +pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, min, src.as_u64x2())) + } } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2951,9 +3380,11 @@ pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] -pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let min = _mm_min_epu64(a, b).as_u64x2(); - transmute(simd_select_bitmask(k, min, u64x2::ZERO)) +pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let min = _mm_min_epu64(a, b).as_u64x2(); + transmute(simd_select_bitmask(k, min, u64x2::ZERO)) + } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. @@ -2963,8 +3394,8 @@ pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 { - simd_fsqrt(a) +pub fn _mm512_sqrt_ps(a: __m512) -> __m512 { + unsafe { simd_fsqrt(a) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2974,8 +3405,8 @@ pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2985,8 +3416,8 @@ pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 { - simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) +pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2996,8 +3427,8 @@ pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3007,8 +3438,8 @@ pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 { - simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) +pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -3018,8 +3449,8 @@ pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3029,8 +3460,8 @@ pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps))] -pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 { - simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) +pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. @@ -3040,8 +3471,8 @@ pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d { - simd_fsqrt(a) +pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d { + unsafe { simd_fsqrt(a) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -3051,8 +3482,8 @@ pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3062,8 +3493,8 @@ pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d { - simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) +pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -3073,8 +3504,8 @@ pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3084,8 +3515,8 @@ pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d { - simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) +pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -3095,8 +3526,8 @@ pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - simd_select_bitmask(k, simd_fsqrt(a), src) +pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3106,8 +3537,8 @@ pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd))] -pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d { - simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) +pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst. @@ -3117,8 +3548,8 @@ pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - simd_fma(a, b, c) +pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(a, b, c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3128,8 +3559,8 @@ pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) +pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3139,8 +3570,8 @@ pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3150,8 +3581,8 @@ pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) +pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3161,8 +3592,8 @@ pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) +pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3172,8 +3603,8 @@ pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3183,8 +3614,8 @@ pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) +pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3194,8 +3625,8 @@ pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) +pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3205,8 +3636,8 @@ pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3216,8 +3647,8 @@ pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps -pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) +pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst. @@ -3227,8 +3658,8 @@ pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_fma(a, b, c) +pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(a, b, c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3238,8 +3669,8 @@ pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) +pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3249,8 +3680,8 @@ pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3260,8 +3691,8 @@ pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) +pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3271,8 +3702,8 @@ pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) +pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3282,8 +3713,8 @@ pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3293,8 +3724,8 @@ pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) +pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3304,8 +3735,8 @@ pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) +pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3315,8 +3746,8 @@ pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3326,8 +3757,8 @@ pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd -pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) +pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst. @@ -3337,8 +3768,8 @@ pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - simd_fma(a, b, simd_neg(c)) +pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3348,8 +3779,8 @@ pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) +pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3359,8 +3790,8 @@ pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3370,8 +3801,8 @@ pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) +pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3381,8 +3812,8 @@ pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) +pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3392,8 +3823,8 @@ pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3403,8 +3834,8 @@ pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) +pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3414,8 +3845,8 @@ pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) +pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3425,8 +3856,8 @@ pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3436,8 +3867,8 @@ pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub -pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) +pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst. @@ -3447,8 +3878,8 @@ pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_fma(a, b, simd_neg(c)) +pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3458,8 +3889,8 @@ pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) +pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3469,8 +3900,8 @@ pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3480,8 +3911,8 @@ pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) +pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3491,8 +3922,8 @@ pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) +pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3502,8 +3933,8 @@ pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3513,8 +3944,8 @@ pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) +pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3524,8 +3955,8 @@ pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) +pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3535,8 +3966,8 @@ pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3546,8 +3977,8 @@ pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub -pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) +pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst. @@ -3557,14 +3988,16 @@ pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!( - add, - sub, - [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15] - ) +pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + add, + sub, + [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15] + ) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3574,8 +4007,8 @@ pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) +pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3585,8 +4018,8 @@ pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3596,8 +4029,8 @@ pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) +pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3607,8 +4040,8 @@ pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) +pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3618,8 +4051,8 @@ pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3629,8 +4062,8 @@ pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) +pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3640,8 +4073,8 @@ pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) +pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3651,8 +4084,8 @@ pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3662,8 +4095,8 @@ pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps -pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) +pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst. @@ -3673,10 +4106,12 @@ pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) +pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3686,8 +4121,8 @@ pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) +pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3697,8 +4132,8 @@ pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3708,8 +4143,8 @@ pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) +pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3719,8 +4154,8 @@ pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) +pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3730,8 +4165,8 @@ pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3741,8 +4176,8 @@ pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) +pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3752,8 +4187,8 @@ pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) +pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3763,8 +4198,8 @@ pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3774,8 +4209,8 @@ pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd -pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) +pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst. @@ -3785,14 +4220,16 @@ pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!( - add, - sub, - [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31] - ) +pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!( + add, + sub, + [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31] + ) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3802,8 +4239,8 @@ pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) +pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3813,8 +4250,8 @@ pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3824,8 +4261,8 @@ pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) +pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3835,8 +4272,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) +pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3846,8 +4283,8 @@ pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3857,8 +4294,8 @@ pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) +pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3868,8 +4305,8 @@ pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mma #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) +pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3879,8 +4316,8 @@ pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3890,8 +4327,8 @@ pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps -pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) +pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst. @@ -3901,10 +4338,12 @@ pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) +pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3914,8 +4353,8 @@ pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) +pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3925,8 +4364,8 @@ pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3936,8 +4375,8 @@ pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) +pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3947,8 +4386,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) +pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3958,8 +4397,8 @@ pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -3969,8 +4408,8 @@ pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) +pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -3980,8 +4419,8 @@ pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) +pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -3991,8 +4430,8 @@ pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4002,8 +4441,8 @@ pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd -pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) +pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst. @@ -4013,8 +4452,8 @@ pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - simd_fma(simd_neg(a), b, c) +pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4024,8 +4463,8 @@ pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) +pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4035,8 +4474,8 @@ pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4046,8 +4485,8 @@ pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) +pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4057,8 +4496,8 @@ pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) +pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4068,8 +4507,8 @@ pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4079,8 +4518,8 @@ pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) +pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4090,8 +4529,8 @@ pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) +pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4101,8 +4540,8 @@ pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4112,8 +4551,8 @@ pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps -pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) +pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst. @@ -4123,8 +4562,8 @@ pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_fma(simd_neg(a), b, c) +pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4134,8 +4573,8 @@ pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) +pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4145,8 +4584,8 @@ pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4156,8 +4595,8 @@ pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) +pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4167,8 +4606,8 @@ pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mm #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) +pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4178,8 +4617,8 @@ pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4189,8 +4628,8 @@ pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) +pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4200,8 +4639,8 @@ pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mm #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) +pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4211,8 +4650,8 @@ pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4222,8 +4661,8 @@ pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd -pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) +pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst. @@ -4233,8 +4672,8 @@ pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4244,8 +4683,8 @@ pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) +pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4255,8 +4694,8 @@ pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { - simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) +pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4266,8 +4705,8 @@ pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { - simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) +pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4277,8 +4716,8 @@ pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) +pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4288,8 +4727,8 @@ pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { - simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) +pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4299,8 +4738,8 @@ pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { - simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) +pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4310,8 +4749,8 @@ pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) +pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4321,8 +4760,8 @@ pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) +pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4332,8 +4771,8 @@ pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps -pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) +pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst. @@ -4343,8 +4782,8 @@ pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4354,8 +4793,8 @@ pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) +pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4365,8 +4804,8 @@ pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { - simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) +pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4376,8 +4815,8 @@ pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { - simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) +pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4387,8 +4826,8 @@ pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mm #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) +pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4398,8 +4837,8 @@ pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) +pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4409,8 +4848,8 @@ pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { - simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) +pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -4420,8 +4859,8 @@ pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mm #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) +pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -4431,8 +4870,8 @@ pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) +pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set). @@ -4442,8 +4881,8 @@ pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd -pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) +pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4453,8 +4892,8 @@ pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 { - transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_rcp14_ps(a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4464,8 +4903,8 @@ pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) +pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4475,8 +4914,8 @@ pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) +pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4486,8 +4925,8 @@ pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 { - transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) +pub fn _mm256_rcp14_ps(a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4497,8 +4936,8 @@ pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4508,8 +4947,8 @@ pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4519,8 +4958,8 @@ pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 { - transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) +pub fn _mm_rcp14_ps(a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4530,8 +4969,8 @@ pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) } } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4541,8 +4980,8 @@ pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] -pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4552,8 +4991,8 @@ pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d { - transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) +pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4563,8 +5002,8 @@ pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) +pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4574,8 +5013,8 @@ pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) +pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4585,8 +5024,8 @@ pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d { - transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) +pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4596,8 +5035,8 @@ pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4607,8 +5046,8 @@ pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4618,8 +5057,8 @@ pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d { - transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) +pub fn _mm_rcp14_pd(a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4629,8 +5068,8 @@ pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) } } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4640,8 +5079,8 @@ pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] -pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4651,8 +5090,8 @@ pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 { - transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4662,8 +5101,8 @@ pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) +pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4673,8 +5112,8 @@ pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) +pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4684,8 +5123,8 @@ pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm256_rsqrt14_ps(a: __m256) -> __m256 { - transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) +pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4695,8 +5134,8 @@ pub unsafe fn _mm256_rsqrt14_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4706,8 +5145,8 @@ pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4717,8 +5156,8 @@ pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm_rsqrt14_ps(a: __m128) -> __m128 { - transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) +pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4728,8 +5167,8 @@ pub unsafe fn _mm_rsqrt14_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) } } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4739,8 +5178,8 @@ pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] -pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4750,8 +5189,8 @@ pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d { - transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) +pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4761,8 +5200,8 @@ pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) +pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4772,8 +5211,8 @@ pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) +pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4783,8 +5222,8 @@ pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d { - transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) +pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4794,8 +5233,8 @@ pub unsafe fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4805,8 +5244,8 @@ pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4816,8 +5255,8 @@ pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm_rsqrt14_pd(a: __m128d) -> __m128d { - transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) +pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4827,8 +5266,8 @@ pub unsafe fn _mm_rsqrt14_pd(a: __m128d) -> __m128d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) } } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4838,8 +5277,8 @@ pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] -pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4849,14 +5288,16 @@ pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 { - transmute(vgetexpps( - a.as_f32x16(), - f32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) -} +pub fn _mm512_getexp_ps(a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } +} /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. /// @@ -4865,13 +5306,15 @@ pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - transmute(vgetexpps( - a.as_f32x16(), - src.as_f32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4881,13 +5324,15 @@ pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vgetexpps( - a.as_f32x16(), - f32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + transmute(vgetexpps( + a.as_f32x16(), + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4897,8 +5342,8 @@ pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 { - transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) +pub fn _mm256_getexp_ps(a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4908,8 +5353,8 @@ pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4919,8 +5364,8 @@ pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4930,8 +5375,8 @@ pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 { - transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) +pub fn _mm_getexp_ps(a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4941,8 +5386,8 @@ pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4952,8 +5397,8 @@ pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] -pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4963,13 +5408,15 @@ pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d { - transmute(vgetexppd( - a.as_f64x8(), - f64x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_getexp_pd(a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4979,13 +5426,15 @@ pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - transmute(vgetexppd( - a.as_f64x8(), - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -4995,13 +5444,15 @@ pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vgetexppd( - a.as_f64x8(), - f64x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + transmute(vgetexppd( + a.as_f64x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5011,8 +5462,8 @@ pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d { - transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) +pub fn _mm256_getexp_pd(a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5022,8 +5473,8 @@ pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5033,8 +5484,8 @@ pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5044,8 +5495,8 @@ pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d { - transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) +pub fn _mm_getexp_pd(a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5055,8 +5506,8 @@ pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5066,8 +5517,8 @@ pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] -pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5084,17 +5535,19 @@ pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_roundscale_ps(a: __m512) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let r = vrndscaleps( - a, - IMM8, - f32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) +pub fn _mm512_roundscale_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let r = vrndscaleps( + a, + IMM8, + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5111,16 +5564,14 @@ pub unsafe fn _mm512_roundscale_ps(a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_roundscale_ps( - src: __m512, - k: __mmask16, - a: __m512, -) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let src = src.as_f32x16(); - let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5137,11 +5588,13 @@ pub unsafe fn _mm512_mask_roundscale_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5158,11 +5611,13 @@ pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_roundscale_ps(a: __m256) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111); - transmute(r) +pub fn _mm256_roundscale_ps(a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5179,16 +5634,14 @@ pub unsafe fn _mm256_roundscale_ps(a: __m256) -> __m256 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_roundscale_ps( - src: __m256, - k: __mmask8, - a: __m256, -) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let src = src.as_f32x8(); - let r = vrndscaleps256(a, IMM8, src, k); - transmute(r) +pub fn _mm256_mask_roundscale_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_f32x8(); + let r = vrndscaleps256(a, IMM8, src, k); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5205,11 +5658,13 @@ pub unsafe fn _mm256_mask_roundscale_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k); - transmute(r) +pub fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5226,11 +5681,13 @@ pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_roundscale_ps(a: __m128) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111); - transmute(r) +pub fn _mm_roundscale_ps(a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5247,16 +5704,14 @@ pub unsafe fn _mm_roundscale_ps(a: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_roundscale_ps( - src: __m128, - k: __mmask8, - a: __m128, -) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let src = src.as_f32x4(); - let r = vrndscaleps128(a, IMM8, src, k); - transmute(r) +pub fn _mm_mask_roundscale_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaleps128(a, IMM8, src, k); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5273,11 +5728,13 @@ pub unsafe fn _mm_mask_roundscale_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k); - transmute(r) +pub fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5294,11 +5751,13 @@ pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_roundscale_pd(a: __m512d) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_roundscale_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5315,16 +5774,18 @@ pub unsafe fn _mm512_roundscale_pd(a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_roundscale_pd( +pub fn _mm512_mask_roundscale_pd( src: __m512d, k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let src = src.as_f64x8(); - let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5341,11 +5802,13 @@ pub unsafe fn _mm512_mask_roundscale_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5362,11 +5825,13 @@ pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_roundscale_pd(a: __m256d) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111); - transmute(r) +pub fn _mm256_roundscale_pd(a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5383,16 +5848,18 @@ pub unsafe fn _mm256_roundscale_pd(a: __m256d) -> __m256d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_roundscale_pd( +pub fn _mm256_mask_roundscale_pd( src: __m256d, k: __mmask8, a: __m256d, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let src = src.as_f64x4(); - let r = vrndscalepd256(a, IMM8, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let src = src.as_f64x4(); + let r = vrndscalepd256(a, IMM8, src, k); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5409,11 +5876,13 @@ pub unsafe fn _mm256_mask_roundscale_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k); - transmute(r) +pub fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5430,11 +5899,13 @@ pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_roundscale_pd(a: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011); - transmute(r) +pub fn _mm_roundscale_pd(a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -5451,16 +5922,14 @@ pub unsafe fn _mm_roundscale_pd(a: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_roundscale_pd( - src: __m128d, - k: __mmask8, - a: __m128d, -) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let src = src.as_f64x2(); - let r = vrndscalepd128(a, IMM8, src, k); - transmute(r) +pub fn _mm_mask_roundscale_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalepd128(a, IMM8, src, k); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -5477,11 +5946,13 @@ pub unsafe fn _mm_mask_roundscale_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k); - transmute(r) +pub fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5491,14 +5962,16 @@ pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 { - transmute(vscalefps( - a.as_f32x16(), - b.as_f32x16(), - f32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5508,14 +5981,16 @@ pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - transmute(vscalefps( - a.as_f32x16(), - b.as_f32x16(), - src.as_f32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + src.as_f32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5525,14 +6000,16 @@ pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - transmute(vscalefps( - a.as_f32x16(), - b.as_f32x16(), - f32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + transmute(vscalefps( + a.as_f32x16(), + b.as_f32x16(), + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5542,13 +6019,15 @@ pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m5 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 { - transmute(vscalefps256( - a.as_f32x8(), - b.as_f32x8(), - f32x8::ZERO, - 0b11111111, - )) +pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 { + unsafe { + transmute(vscalefps256( + a.as_f32x8(), + b.as_f32x8(), + f32x8::ZERO, + 0b11111111, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5558,8 +6037,8 @@ pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5569,8 +6048,8 @@ pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5580,13 +6059,15 @@ pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 { - transmute(vscalefps128( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - 0b00001111, - )) +pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vscalefps128( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + 0b00001111, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5596,8 +6077,8 @@ pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5607,8 +6088,8 @@ pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] -pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5618,14 +6099,16 @@ pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d { - transmute(vscalefpd( - a.as_f64x8(), - b.as_f64x8(), - f64x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5635,14 +6118,16 @@ pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - transmute(vscalefpd( - a.as_f64x8(), - b.as_f64x8(), - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5652,14 +6137,16 @@ pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - transmute(vscalefpd( - a.as_f64x8(), - b.as_f64x8(), - f64x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + transmute(vscalefpd( + a.as_f64x8(), + b.as_f64x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5669,13 +6156,15 @@ pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d { - transmute(vscalefpd256( - a.as_f64x4(), - b.as_f64x4(), - f64x4::ZERO, - 0b00001111, - )) +pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d { + unsafe { + transmute(vscalefpd256( + a.as_f64x4(), + b.as_f64x4(), + f64x4::ZERO, + 0b00001111, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5685,8 +6174,8 @@ pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5696,8 +6185,8 @@ pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5707,13 +6196,15 @@ pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefpd128( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - 0b00000011, - )) +pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefpd128( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b00000011, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5723,8 +6214,8 @@ pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -5734,8 +6225,8 @@ pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] -pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -5746,13 +6237,15 @@ pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5763,18 +6256,20 @@ pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fixupimm_ps( +pub fn _mm512_mask_fixupimm_ps( a: __m512, k: __mmask16, b: __m512, c: __m512i, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5785,18 +6280,20 @@ pub unsafe fn _mm512_mask_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fixupimm_ps( +pub fn _mm512_maskz_fixupimm_ps( k: __mmask16, a: __m512, b: __m512, c: __m512i, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -5807,13 +6304,15 @@ pub unsafe fn _mm512_maskz_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let c = c.as_i32x8(); - let r = vfixupimmps256(a, b, c, IMM8, 0b11111111); - transmute(r) +pub fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmps256(a, b, c, IMM8, 0b11111111); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5824,18 +6323,20 @@ pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_fixupimm_ps( +pub fn _mm256_mask_fixupimm_ps( a: __m256, k: __mmask8, b: __m256, c: __m256i, ) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let c = c.as_i32x8(); - let r = vfixupimmps256(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmps256(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5846,18 +6347,20 @@ pub unsafe fn _mm256_mask_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_maskz_fixupimm_ps( +pub fn _mm256_maskz_fixupimm_ps( k: __mmask8, a: __m256, b: __m256, c: __m256i, ) -> __m256 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let c = c.as_i32x8(); - let r = vfixupimmpsz256(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let c = c.as_i32x8(); + let r = vfixupimmpsz256(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -5868,13 +6371,15 @@ pub unsafe fn _mm256_maskz_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmps128(a, b, c, IMM8, 0b00001111); - transmute(r) +pub fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmps128(a, b, c, IMM8, 0b00001111); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5885,18 +6390,20 @@ pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fixupimm_ps( +pub fn _mm_mask_fixupimm_ps( a: __m128, k: __mmask8, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmps128(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmps128(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5907,18 +6414,20 @@ pub unsafe fn _mm_mask_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fixupimm_ps( +pub fn _mm_maskz_fixupimm_ps( k: __mmask8, a: __m128, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmpsz128(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmpsz128(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -5929,13 +6438,15 @@ pub unsafe fn _mm_maskz_fixupimm_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5946,18 +6457,20 @@ pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fixupimm_pd( +pub fn _mm512_mask_fixupimm_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512i, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -5968,18 +6481,20 @@ pub unsafe fn _mm512_mask_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fixupimm_pd( +pub fn _mm512_maskz_fixupimm_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512i, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -5990,13 +6505,15 @@ pub unsafe fn _mm512_maskz_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let c = c.as_i64x4(); - let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111); - transmute(r) +pub fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -6007,18 +6524,20 @@ pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_fixupimm_pd( +pub fn _mm256_mask_fixupimm_pd( a: __m256d, k: __mmask8, b: __m256d, c: __m256i, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let c = c.as_i64x4(); - let r = vfixupimmpd256(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpd256(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -6029,18 +6548,20 @@ pub unsafe fn _mm256_mask_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_maskz_fixupimm_pd( +pub fn _mm256_maskz_fixupimm_pd( k: __mmask8, a: __m256d, b: __m256d, c: __m256i, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let c = c.as_i64x4(); - let r = vfixupimmpdz256(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let c = c.as_i64x4(); + let r = vfixupimmpdz256(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -6051,13 +6572,15 @@ pub unsafe fn _mm256_maskz_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011); - transmute(r) +pub fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -6068,18 +6591,20 @@ pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fixupimm_pd( +pub fn _mm_mask_fixupimm_pd( a: __m128d, k: __mmask8, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmpd128(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpd128(a, b, c, IMM8, k); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting. @@ -6090,18 +6615,20 @@ pub unsafe fn _mm_mask_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fixupimm_pd( +pub fn _mm_maskz_fixupimm_pd( k: __mmask8, a: __m128d, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmpdz128(a, b, c, IMM8, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmpdz128(a, b, c, IMM8, k); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6112,17 +6639,15 @@ pub unsafe fn _mm_maskz_fixupimm_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_ternarylogic_epi32( - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let c = c.as_i32x16(); - let r = vpternlogd(a, b, c, IMM8); - transmute(r) +pub fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let c = c.as_i32x16(); + let r = vpternlogd(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). @@ -6133,18 +6658,20 @@ pub unsafe fn _mm512_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_ternarylogic_epi32( +pub fn _mm512_mask_ternarylogic_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i32x16(); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let r = vpternlogd(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x16(); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r = vpternlogd(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6155,18 +6682,20 @@ pub unsafe fn _mm512_mask_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_ternarylogic_epi32( +pub fn _mm512_maskz_ternarylogic_epi32( k: __mmask16, a: __m512i, b: __m512i, c: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let c = c.as_i32x16(); - let r = vpternlogd(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let c = c.as_i32x16(); + let r = vpternlogd(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6177,17 +6706,15 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_ternarylogic_epi32( - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let c = c.as_i32x8(); - let r = vpternlogd256(a, b, c, IMM8); - transmute(r) +pub fn _mm256_ternarylogic_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let c = c.as_i32x8(); + let r = vpternlogd256(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). @@ -6198,18 +6725,20 @@ pub unsafe fn _mm256_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_ternarylogic_epi32( +pub fn _mm256_mask_ternarylogic_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i32x8(); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let r = vpternlogd256(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x8(); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r = vpternlogd256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6220,18 +6749,20 @@ pub unsafe fn _mm256_mask_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_maskz_ternarylogic_epi32( +pub fn _mm256_maskz_ternarylogic_epi32( k: __mmask8, a: __m256i, b: __m256i, c: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let c = c.as_i32x8(); - let r = vpternlogd256(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let c = c.as_i32x8(); + let r = vpternlogd256(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6242,17 +6773,15 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_ternarylogic_epi32( - a: __m128i, - b: __m128i, - c: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let c = c.as_i32x4(); - let r = vpternlogd128(a, b, c, IMM8); - transmute(r) +pub fn _mm_ternarylogic_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let c = c.as_i32x4(); + let r = vpternlogd128(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set). @@ -6263,18 +6792,20 @@ pub unsafe fn _mm_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_ternarylogic_epi32( +pub fn _mm_mask_ternarylogic_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i32x4(); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let r = vpternlogd128(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i32x4(); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r = vpternlogd128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6285,18 +6816,20 @@ pub unsafe fn _mm_mask_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_ternarylogic_epi32( +pub fn _mm_maskz_ternarylogic_epi32( k: __mmask8, a: __m128i, b: __m128i, c: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let c = c.as_i32x4(); - let r = vpternlogd128(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let c = c.as_i32x4(); + let r = vpternlogd128(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6307,17 +6840,15 @@ pub unsafe fn _mm_maskz_ternarylogic_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_ternarylogic_epi64( - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let c = c.as_i64x8(); - let r = vpternlogq(a, b, c, IMM8); - transmute(r) +pub fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let c = c.as_i64x8(); + let r = vpternlogq(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). @@ -6328,18 +6859,20 @@ pub unsafe fn _mm512_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_ternarylogic_epi64( +pub fn _mm512_mask_ternarylogic_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i64x8(); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let r = vpternlogq(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x8(); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r = vpternlogq(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6350,18 +6883,20 @@ pub unsafe fn _mm512_mask_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_ternarylogic_epi64( +pub fn _mm512_maskz_ternarylogic_epi64( k: __mmask8, a: __m512i, b: __m512i, c: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let c = c.as_i64x8(); - let r = vpternlogq(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let c = c.as_i64x8(); + let r = vpternlogq(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6372,17 +6907,15 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_ternarylogic_epi64( - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let c = c.as_i64x4(); - let r = vpternlogq256(a, b, c, IMM8); - transmute(r) +pub fn _mm256_ternarylogic_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let c = c.as_i64x4(); + let r = vpternlogq256(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). @@ -6393,18 +6926,20 @@ pub unsafe fn _mm256_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_ternarylogic_epi64( +pub fn _mm256_mask_ternarylogic_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i64x4(); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let r = vpternlogq256(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x4(); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r = vpternlogq256(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6415,18 +6950,20 @@ pub unsafe fn _mm256_mask_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_maskz_ternarylogic_epi64( +pub fn _mm256_maskz_ternarylogic_epi64( k: __mmask8, a: __m256i, b: __m256i, c: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let c = c.as_i64x4(); - let r = vpternlogq256(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let c = c.as_i64x4(); + let r = vpternlogq256(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6437,17 +6974,15 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_ternarylogic_epi64( - a: __m128i, - b: __m128i, - c: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let b = b.as_i64x2(); - let c = c.as_i64x2(); - let r = vpternlogq128(a, b, c, IMM8); - transmute(r) +pub fn _mm_ternarylogic_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let c = c.as_i64x2(); + let r = vpternlogq128(a, b, c, IMM8); + transmute(r) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set). @@ -6458,18 +6993,20 @@ pub unsafe fn _mm_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_ternarylogic_epi64( +pub fn _mm_mask_ternarylogic_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let src = src.as_i64x2(); - let a = a.as_i64x2(); - let b = b.as_i64x2(); - let r = vpternlogq128(src, a, b, IMM8); - transmute(simd_select_bitmask(k, r, src)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let src = src.as_i64x2(); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let r = vpternlogq128(src, a, b, IMM8); + transmute(simd_select_bitmask(k, r, src)) + } } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set). @@ -6480,18 +7017,20 @@ pub unsafe fn _mm_mask_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_ternarylogic_epi64( +pub fn _mm_maskz_ternarylogic_epi64( k: __mmask8, a: __m128i, b: __m128i, c: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let b = b.as_i64x2(); - let c = c.as_i64x2(); - let r = vpternlogq128(a, b, c, IMM8); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let c = c.as_i64x2(); + let r = vpternlogq128(a, b, c, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. @@ -6511,24 +7050,23 @@ pub unsafe fn _mm_maskz_ternarylogic_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm512_getmant_ps< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm512_getmant_ps( a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x16(); - let zero = f32x16::ZERO; - let r = vgetmantps( - a, - SIGN << 2 | NORM, - zero, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let zero = f32x16::ZERO; + let r = vgetmantps( + a, + SIGN << 2 | NORM, + zero, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6548,7 +7086,7 @@ pub unsafe fn _mm512_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_getmant_ps< +pub fn _mm512_mask_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -6556,12 +7094,14 @@ pub unsafe fn _mm512_mask_getmant_ps< k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x16(); - let src = src.as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6581,24 +7121,26 @@ pub unsafe fn _mm512_mask_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_maskz_getmant_ps< +pub fn _mm512_maskz_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x16(); - let r = vgetmantps( - a, - SIGN << 2 | NORM, - f32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x16(); + let r = vgetmantps( + a, + SIGN << 2 | NORM, + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. @@ -6618,17 +7160,16 @@ pub unsafe fn _mm512_maskz_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm256_getmant_ps< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm256_getmant_ps( a: __m256, ) -> __m256 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x8(); - let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6648,7 +7189,7 @@ pub unsafe fn _mm256_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm256_mask_getmant_ps< +pub fn _mm256_mask_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -6656,12 +7197,14 @@ pub unsafe fn _mm256_mask_getmant_ps< k: __mmask8, a: __m256, ) -> __m256 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x8(); - let src = src.as_f32x8(); - let r = vgetmantps256(a, SIGN << 2 | NORM, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let src = src.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, src, k); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6681,18 +7224,20 @@ pub unsafe fn _mm256_mask_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm256_maskz_getmant_ps< +pub fn _mm256_maskz_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask8, a: __m256, ) -> __m256 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x8(); - let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x8(); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. @@ -6712,17 +7257,16 @@ pub unsafe fn _mm256_maskz_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm_getmant_ps< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm_getmant_ps( a: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6742,7 +7286,7 @@ pub unsafe fn _mm_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_mask_getmant_ps< +pub fn _mm_mask_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -6750,12 +7294,14 @@ pub unsafe fn _mm_mask_getmant_ps< k: __mmask8, a: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let src = src.as_f32x4(); - let r = vgetmantps128(a, SIGN << 2 | NORM, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, src, k); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6775,18 +7321,20 @@ pub unsafe fn _mm_mask_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_maskz_getmant_ps< +pub fn _mm_maskz_getmant_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask8, a: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6806,24 +7354,23 @@ pub unsafe fn _mm_maskz_getmant_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm512_getmant_pd< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm512_getmant_pd( a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x8(); - let zero = f64x8::ZERO; - let r = vgetmantpd( - a, - SIGN << 2 | NORM, - zero, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let zero = f64x8::ZERO; + let r = vgetmantpd( + a, + SIGN << 2 | NORM, + zero, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6843,7 +7390,7 @@ pub unsafe fn _mm512_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_getmant_pd< +pub fn _mm512_mask_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -6851,12 +7398,14 @@ pub unsafe fn _mm512_mask_getmant_pd< k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x8(); - let src = src.as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6876,24 +7425,26 @@ pub unsafe fn _mm512_mask_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_maskz_getmant_pd< +pub fn _mm512_maskz_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x8(); - let r = vgetmantpd( - a, - SIGN << 2 | NORM, - f64x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x8(); + let r = vgetmantpd( + a, + SIGN << 2 | NORM, + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6913,17 +7464,16 @@ pub unsafe fn _mm512_maskz_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm256_getmant_pd< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm256_getmant_pd( a: __m256d, ) -> __m256d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x4(); - let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6943,7 +7493,7 @@ pub unsafe fn _mm256_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm256_mask_getmant_pd< +pub fn _mm256_mask_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -6951,12 +7501,14 @@ pub unsafe fn _mm256_mask_getmant_pd< k: __mmask8, a: __m256d, ) -> __m256d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x4(); - let src = src.as_f64x4(); - let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let src = src.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -6976,18 +7528,20 @@ pub unsafe fn _mm256_mask_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm256_maskz_getmant_pd< +pub fn _mm256_maskz_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask8, a: __m256d, ) -> __m256d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x4(); - let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x4(); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -7007,17 +7561,16 @@ pub unsafe fn _mm256_maskz_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm_getmant_pd< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm_getmant_pd( a: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -7037,7 +7590,7 @@ pub unsafe fn _mm_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_mask_getmant_pd< +pub fn _mm_mask_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -7045,12 +7598,14 @@ pub unsafe fn _mm_mask_getmant_pd< k: __mmask8, a: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let src = src.as_f64x2(); - let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -7070,18 +7625,20 @@ pub unsafe fn _mm_mask_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_maskz_getmant_pd< +pub fn _mm_maskz_getmant_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( k: __mmask8, a: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k); + transmute(r) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7099,12 +7656,14 @@ pub unsafe fn _mm_maskz_getmant_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vaddps(a, b, ROUNDING); - transmute(r) +pub fn _mm512_add_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(r) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7122,17 +7681,19 @@ pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_add_round_ps( +pub fn _mm512_mask_add_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vaddps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7150,16 +7711,18 @@ pub unsafe fn _mm512_mask_add_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_add_round_ps( +pub fn _mm512_maskz_add_round_ps( k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vaddps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vaddps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7177,12 +7740,14 @@ pub unsafe fn _mm512_maskz_add_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vaddpd(a, b, ROUNDING); - transmute(r) +pub fn _mm512_add_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(r) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7200,17 +7765,19 @@ pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_add_round_pd( +pub fn _mm512_mask_add_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vaddpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7228,16 +7795,18 @@ pub unsafe fn _mm512_mask_add_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_add_round_pd( +pub fn _mm512_maskz_add_round_pd( k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vaddpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vaddpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ @@ -7255,12 +7824,14 @@ pub unsafe fn _mm512_maskz_add_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vsubps(a, b, ROUNDING); - transmute(r) +pub fn _mm512_sub_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(r) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7278,17 +7849,19 @@ pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_sub_round_ps( +pub fn _mm512_mask_sub_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vsubps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7306,16 +7879,18 @@ pub unsafe fn _mm512_mask_sub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_sub_round_ps( +pub fn _mm512_maskz_sub_round_ps( k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vsubps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vsubps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ @@ -7333,12 +7908,14 @@ pub unsafe fn _mm512_maskz_sub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vsubpd(a, b, ROUNDING); - transmute(r) +pub fn _mm512_sub_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(r) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7356,17 +7933,19 @@ pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_sub_round_pd( +pub fn _mm512_mask_sub_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vsubpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7384,16 +7963,18 @@ pub unsafe fn _mm512_mask_sub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_sub_round_pd( +pub fn _mm512_maskz_sub_round_pd( k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vsubpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vsubpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7411,12 +7992,14 @@ pub unsafe fn _mm512_maskz_sub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmulps(a, b, ROUNDING); - transmute(r) +pub fn _mm512_mul_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(r) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7434,17 +8017,19 @@ pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_mul_round_ps( +pub fn _mm512_mask_mul_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmulps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7462,16 +8047,18 @@ pub unsafe fn _mm512_mask_mul_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_mul_round_ps( +pub fn _mm512_maskz_mul_round_ps( k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmulps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmulps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7489,12 +8076,14 @@ pub unsafe fn _mm512_maskz_mul_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmulpd(a, b, ROUNDING); - transmute(r) +pub fn _mm512_mul_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(r) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7512,17 +8101,19 @@ pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_mul_round_pd( +pub fn _mm512_mask_mul_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmulpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7540,16 +8131,18 @@ pub unsafe fn _mm512_mask_mul_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_mul_round_pd( +pub fn _mm512_maskz_mul_round_pd( k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmulpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmulpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\ @@ -7567,12 +8160,14 @@ pub unsafe fn _mm512_maskz_mul_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vdivps(a, b, ROUNDING); - transmute(r) +pub fn _mm512_div_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(r) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7590,17 +8185,19 @@ pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_div_round_ps( +pub fn _mm512_mask_div_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vdivps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7618,16 +8215,18 @@ pub unsafe fn _mm512_mask_div_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_div_round_ps( +pub fn _mm512_maskz_div_round_ps( k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vdivps(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vdivps(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\ @@ -7645,12 +8244,14 @@ pub unsafe fn _mm512_maskz_div_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vdivpd(a, b, ROUNDING); - transmute(r) +pub fn _mm512_div_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(r) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7668,17 +8269,19 @@ pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_div_round_pd( +pub fn _mm512_mask_div_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vdivpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7696,16 +8299,18 @@ pub unsafe fn _mm512_mask_div_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_div_round_pd( +pub fn _mm512_maskz_div_round_pd( k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vdivpd(a, b, ROUNDING); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vdivpd(a, b, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ @@ -7723,11 +8328,13 @@ pub unsafe fn _mm512_maskz_div_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_sqrt_round_ps(a: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vsqrtps(a, ROUNDING); - transmute(r) +pub fn _mm512_sqrt_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(r) + } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7745,15 +8352,17 @@ pub unsafe fn _mm512_sqrt_round_ps(a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_sqrt_round_ps( +pub fn _mm512_mask_sqrt_round_ps( src: __m512, k: __mmask16, a: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vsqrtps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7771,11 +8380,13 @@ pub unsafe fn _mm512_mask_sqrt_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vsqrtps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) +pub fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vsqrtps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ @@ -7793,11 +8404,13 @@ pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_sqrt_round_pd(a: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vsqrtpd(a, ROUNDING); - transmute(r) +pub fn _mm512_sqrt_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(r) + } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -7815,15 +8428,17 @@ pub unsafe fn _mm512_sqrt_round_pd(a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_sqrt_round_pd( +pub fn _mm512_mask_sqrt_round_pd( src: __m512d, k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vsqrtpd(a, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7841,11 +8456,13 @@ pub unsafe fn _mm512_mask_sqrt_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vsqrtpd(a, ROUNDING); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) +pub fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vsqrtpd(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\ @@ -7863,13 +8480,11 @@ pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmadd_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmadd132psround(a, b, c, ROUNDING) +pub fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(a, b, c, ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -7887,14 +8502,16 @@ pub unsafe fn _mm512_fmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmadd_round_ps( +pub fn _mm512_mask_fmadd_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -7912,14 +8529,16 @@ pub unsafe fn _mm512_mask_fmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmadd_round_ps( +pub fn _mm512_maskz_fmadd_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -7937,14 +8556,16 @@ pub unsafe fn _mm512_maskz_fmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmadd_round_ps( +pub fn _mm512_mask3_fmadd_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\ @@ -7962,13 +8583,11 @@ pub unsafe fn _mm512_mask3_fmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmadd_round_pd( - a: __m512d, - b: __m512d, - c: __m512d, -) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmadd132pdround(a, b, c, ROUNDING) +pub fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(a, b, c, ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -7986,14 +8605,16 @@ pub unsafe fn _mm512_fmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmadd_round_pd( +pub fn _mm512_mask_fmadd_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8011,14 +8632,16 @@ pub unsafe fn _mm512_mask_fmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmadd_round_pd( +pub fn _mm512_maskz_fmadd_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8036,14 +8659,16 @@ pub unsafe fn _mm512_maskz_fmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmadd_round_pd( +pub fn _mm512_mask3_fmadd_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\ @@ -8061,13 +8686,11 @@ pub unsafe fn _mm512_mask3_fmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmsub_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmadd132psround(a, b, simd_neg(c), ROUNDING) +pub fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8085,15 +8708,17 @@ pub unsafe fn _mm512_fmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmsub_round_ps( +pub fn _mm512_mask_fmsub_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8111,15 +8736,17 @@ pub unsafe fn _mm512_mask_fmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmsub_round_ps( +pub fn _mm512_maskz_fmsub_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8137,15 +8764,17 @@ pub unsafe fn _mm512_maskz_fmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmsub_round_ps( +pub fn _mm512_mask3_fmsub_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\ @@ -8163,13 +8792,11 @@ pub unsafe fn _mm512_mask3_fmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmsub_round_pd( - a: __m512d, - b: __m512d, - c: __m512d, -) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmadd132pdround(a, b, simd_neg(c), ROUNDING) +pub fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8187,15 +8814,17 @@ pub unsafe fn _mm512_fmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmsub_round_pd( +pub fn _mm512_mask_fmsub_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8213,15 +8842,17 @@ pub unsafe fn _mm512_mask_fmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmsub_round_pd( +pub fn _mm512_maskz_fmsub_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8239,15 +8870,17 @@ pub unsafe fn _mm512_maskz_fmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmsub_round_pd( +pub fn _mm512_mask3_fmsub_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\ @@ -8265,13 +8898,11 @@ pub unsafe fn _mm512_mask3_fmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmaddsub_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmaddsubpsround(a, b, c, ROUNDING) +pub fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpsround(a, b, c, ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8289,14 +8920,16 @@ pub unsafe fn _mm512_fmaddsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmaddsub_round_ps( +pub fn _mm512_mask_fmaddsub_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8314,14 +8947,16 @@ pub unsafe fn _mm512_mask_fmaddsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmaddsub_round_ps( +pub fn _mm512_maskz_fmaddsub_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8339,14 +8974,16 @@ pub unsafe fn _mm512_maskz_fmaddsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmaddsub_round_ps( +pub fn _mm512_mask3_fmaddsub_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\ @@ -8364,13 +9001,15 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmaddsub_round_pd( +pub fn _mm512_fmaddsub_round_pd( a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmaddsubpdround(a, b, c, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpdround(a, b, c, ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8388,14 +9027,16 @@ pub unsafe fn _mm512_fmaddsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmaddsub_round_pd( +pub fn _mm512_mask_fmaddsub_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8413,14 +9054,16 @@ pub unsafe fn _mm512_mask_fmaddsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmaddsub_round_pd( +pub fn _mm512_maskz_fmaddsub_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8438,14 +9081,16 @@ pub unsafe fn _mm512_maskz_fmaddsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmaddsub_round_pd( +pub fn _mm512_mask3_fmaddsub_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\ @@ -8463,13 +9108,11 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmsubadd_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmaddsubpsround(a, b, simd_neg(c), ROUNDING) +pub fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpsround(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8487,15 +9130,17 @@ pub unsafe fn _mm512_fmsubadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmsubadd_round_ps( +pub fn _mm512_mask_fmsubadd_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8513,15 +9158,17 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmsubadd_round_ps( +pub fn _mm512_maskz_fmsubadd_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8539,15 +9186,17 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmsubadd_round_ps( +pub fn _mm512_mask3_fmsubadd_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\ @@ -8565,13 +9214,15 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fmsubadd_round_pd( +pub fn _mm512_fmsubadd_round_pd( a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmaddsubpdround(a, b, simd_neg(c), ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubpdround(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8589,15 +9240,17 @@ pub unsafe fn _mm512_fmsubadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fmsubadd_round_pd( +pub fn _mm512_mask_fmsubadd_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8615,15 +9268,17 @@ pub unsafe fn _mm512_mask_fmsubadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fmsubadd_round_pd( +pub fn _mm512_maskz_fmsubadd_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8641,15 +9296,17 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fmsubadd_round_pd( +pub fn _mm512_mask3_fmsubadd_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\ @@ -8667,13 +9324,11 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fnmadd_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmadd132psround(simd_neg(a), b, c, ROUNDING) +pub fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(simd_neg(a), b, c, ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8691,15 +9346,17 @@ pub unsafe fn _mm512_fnmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fnmadd_round_ps( +pub fn _mm512_mask_fnmadd_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8717,15 +9374,17 @@ pub unsafe fn _mm512_mask_fnmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fnmadd_round_ps( +pub fn _mm512_maskz_fnmadd_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8743,15 +9402,17 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fnmadd_round_ps( +pub fn _mm512_mask3_fnmadd_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\ @@ -8769,13 +9430,11 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fnmadd_round_pd( - a: __m512d, - b: __m512d, - c: __m512d, -) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmadd132pdround(simd_neg(a), b, c, ROUNDING) +pub fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(simd_neg(a), b, c, ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8793,15 +9452,17 @@ pub unsafe fn _mm512_fnmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fnmadd_round_pd( +pub fn _mm512_mask_fnmadd_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8819,15 +9480,17 @@ pub unsafe fn _mm512_mask_fnmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fnmadd_round_pd( +pub fn _mm512_maskz_fnmadd_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8845,15 +9508,17 @@ pub unsafe fn _mm512_maskz_fnmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fnmadd_round_pd( +pub fn _mm512_mask3_fnmadd_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\ @@ -8871,13 +9536,11 @@ pub unsafe fn _mm512_mask3_fnmadd_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fnmsub_round_ps( - a: __m512, - b: __m512, - c: __m512, -) -> __m512 { - static_assert_rounding!(ROUNDING); - vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING) +pub fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8895,15 +9558,17 @@ pub unsafe fn _mm512_fnmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fnmsub_round_ps( +pub fn _mm512_mask_fnmsub_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -8921,15 +9586,17 @@ pub unsafe fn _mm512_mask_fnmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fnmsub_round_ps( +pub fn _mm512_maskz_fnmsub_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_ps()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_ps()) + } } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -8947,15 +9614,17 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fnmsub_round_ps( +pub fn _mm512_mask3_fnmsub_round_ps( a: __m512, b: __m512, c: __m512, k: __mmask16, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\ @@ -8973,13 +9642,11 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_fnmsub_round_pd( - a: __m512d, - b: __m512d, - c: __m512d, -) -> __m512d { - static_assert_rounding!(ROUNDING); - vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING) +pub fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\ @@ -8997,15 +9664,17 @@ pub unsafe fn _mm512_fnmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_fnmsub_round_pd( +pub fn _mm512_mask_fnmsub_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, a) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, a) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9023,15 +9692,17 @@ pub unsafe fn _mm512_mask_fnmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_maskz_fnmsub_round_pd( +pub fn _mm512_maskz_fnmsub_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, _mm512_setzero_pd()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, _mm512_setzero_pd()) + } } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\ @@ -9049,15 +9720,17 @@ pub unsafe fn _mm512_maskz_fnmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask3_fnmsub_round_pd( +pub fn _mm512_mask3_fnmsub_round_pd( a: __m512d, b: __m512d, c: __m512d, k: __mmask8, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); - simd_select_bitmask(k, r, c) + unsafe { + static_assert_rounding!(ROUNDING); + let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING); + simd_select_bitmask(k, r, c) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\ @@ -9069,12 +9742,14 @@ pub unsafe fn _mm512_mask3_fnmsub_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmaxps(a, b, SAE); - transmute(r) +pub fn _mm512_max_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(r) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9086,17 +9761,19 @@ pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512) -> __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_max_round_ps( +pub fn _mm512_mask_max_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmaxps(a, b, SAE); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9108,16 +9785,14 @@ pub unsafe fn _mm512_mask_max_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxps, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_max_round_ps( - k: __mmask16, - a: __m512, - b: __m512, -) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vmaxps(a, b, SAE); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) +pub fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vmaxps(a, b, SAE); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\ @@ -9129,12 +9804,14 @@ pub unsafe fn _mm512_maskz_max_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmaxpd(a, b, SAE); - transmute(r) +pub fn _mm512_max_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(r) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9146,17 +9823,19 @@ pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_max_round_pd( +pub fn _mm512_mask_max_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmaxpd(a, b, SAE); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9168,16 +9847,14 @@ pub unsafe fn _mm512_mask_max_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_max_round_pd( - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vmaxpd(a, b, SAE); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) +pub fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vmaxpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\ @@ -9189,12 +9866,14 @@ pub unsafe fn _mm512_maskz_max_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vminps(a, b, SAE); - transmute(r) +pub fn _mm512_min_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(r) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9206,17 +9885,19 @@ pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512) -> __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_min_round_ps( +pub fn _mm512_mask_min_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vminps(a, b, SAE); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9228,16 +9909,14 @@ pub unsafe fn _mm512_mask_min_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminps, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_min_round_ps( - k: __mmask16, - a: __m512, - b: __m512, -) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vminps(a, b, SAE); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) +pub fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vminps(a, b, SAE); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\ @@ -9249,12 +9928,14 @@ pub unsafe fn _mm512_maskz_min_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vminpd(a, b, SAE); - transmute(r) +pub fn _mm512_min_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(r) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9266,17 +9947,19 @@ pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_min_round_pd( +pub fn _mm512_mask_min_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vminpd(a, b, SAE); - transmute(simd_select_bitmask(k, r, src.as_f64x8())) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, src.as_f64x8())) + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9288,16 +9971,14 @@ pub unsafe fn _mm512_mask_min_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminpd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_min_round_pd( - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vminpd(a, b, SAE); - transmute(simd_select_bitmask(k, r, f64x8::ZERO)) +pub fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vminpd(a, b, SAE); + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) + } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9309,11 +9990,13 @@ pub unsafe fn _mm512_maskz_min_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_getexp_round_ps(a: __m512) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) +pub fn _mm512_getexp_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9325,16 +10008,14 @@ pub unsafe fn _mm512_getexp_round_ps(a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_getexp_round_ps( - src: __m512, - k: __mmask16, - a: __m512, -) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_f32x16(); - let r = vgetexpps(a, src, k, SAE); - transmute(r) +pub fn _mm512_mask_getexp_round_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetexpps(a, src, k, SAE); + transmute(r) + } } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9346,11 +10027,13 @@ pub unsafe fn _mm512_mask_getexp_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vgetexpps(a, f32x16::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetexpps(a, f32x16::ZERO, k, SAE); + transmute(r) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9362,11 +10045,13 @@ pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m5 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_getexp_round_pd(a: __m512d) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm512_getexp_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9378,16 +10063,18 @@ pub unsafe fn _mm512_getexp_round_pd(a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_getexp_round_pd( +pub fn _mm512_mask_getexp_round_pd( src: __m512d, k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let src = src.as_f64x8(); - let r = vgetexppd(a, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetexppd(a, src, k, SAE); + transmute(r) + } } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9399,11 +10086,13 @@ pub unsafe fn _mm512_mask_getexp_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vgetexppd(a, f64x8::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetexppd(a, f64x8::ZERO, k, SAE); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -9421,12 +10110,14 @@ pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm512_roundscale_round_ps(a: __m512) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) +pub fn _mm512_roundscale_round_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9444,17 +10135,19 @@ pub unsafe fn _mm512_roundscale_round_ps(a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_roundscale_round_ps( +pub fn _mm512_mask_roundscale_round_ps( src: __m512, k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_f32x16(); - let r = vrndscaleps(a, IMM8, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vrndscaleps(a, IMM8, src, k, SAE); + transmute(r) + } } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9472,15 +10165,17 @@ pub unsafe fn _mm512_mask_roundscale_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_maskz_roundscale_round_ps( +pub fn _mm512_maskz_roundscale_round_ps( k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -9498,12 +10193,14 @@ pub unsafe fn _mm512_maskz_roundscale_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] -pub unsafe fn _mm512_roundscale_round_pd(a: __m512d) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm512_roundscale_round_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9521,17 +10218,19 @@ pub unsafe fn _mm512_roundscale_round_pd(a: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_roundscale_round_pd( +pub fn _mm512_mask_roundscale_round_pd( src: __m512d, k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let src = src.as_f64x8(); - let r = vrndscalepd(a, IMM8, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vrndscalepd(a, IMM8, src, k, SAE); + transmute(r) + } } /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9549,15 +10248,17 @@ pub unsafe fn _mm512_mask_roundscale_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_maskz_roundscale_round_pd( +pub fn _mm512_maskz_roundscale_round_pd( k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\ @@ -9575,12 +10276,14 @@ pub unsafe fn _mm512_maskz_roundscale_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING); - transmute(r) +pub fn _mm512_scalef_round_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9598,18 +10301,20 @@ pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_scalef_round_ps( +pub fn _mm512_mask_scalef_round_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let src = src.as_f32x16(); - let r = vscalefps(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let src = src.as_f32x16(); + let r = vscalefps(a, b, src, k, ROUNDING); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9627,16 +10332,18 @@ pub unsafe fn _mm512_mask_scalef_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_scalef_round_ps( +pub fn _mm512_maskz_scalef_round_ps( k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\ @@ -9654,12 +10361,14 @@ pub unsafe fn _mm512_maskz_scalef_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm512_scalef_round_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -9677,18 +10386,20 @@ pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_scalef_round_pd( +pub fn _mm512_mask_scalef_round_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let src = src.as_f64x8(); - let r = vscalefpd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let src = src.as_f64x8(); + let r = vscalefpd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -9706,16 +10417,18 @@ pub unsafe fn _mm512_mask_scalef_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_scalef_round_pd( +pub fn _mm512_maskz_scalef_round_pd( k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\ @@ -9727,18 +10440,20 @@ pub unsafe fn _mm512_maskz_scalef_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_fixupimm_round_ps( +pub fn _mm512_fixupimm_round_ps( a: __m512, b: __m512, c: __m512i, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ @@ -9750,19 +10465,21 @@ pub unsafe fn _mm512_fixupimm_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm512_mask_fixupimm_round_ps( +pub fn _mm512_mask_fixupimm_round_ps( a: __m512, k: __mmask16, b: __m512, c: __m512i, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmps(a, b, c, IMM8, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmps(a, b, c, IMM8, k, SAE); + transmute(r) + } } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ @@ -9774,19 +10491,21 @@ pub unsafe fn _mm512_mask_fixupimm_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm512_maskz_fixupimm_round_ps( +pub fn _mm512_maskz_fixupimm_round_ps( k: __mmask16, a: __m512, b: __m512, c: __m512i, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let c = c.as_i32x16(); - let r = vfixupimmpsz(a, b, c, IMM8, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let c = c.as_i32x16(); + let r = vfixupimmpsz(a, b, c, IMM8, k, SAE); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\ @@ -9798,18 +10517,20 @@ pub unsafe fn _mm512_maskz_fixupimm_round_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_fixupimm_round_pd( +pub fn _mm512_fixupimm_round_pd( a: __m512d, b: __m512d, c: __m512i, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ @@ -9821,19 +10542,21 @@ pub unsafe fn _mm512_fixupimm_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm512_mask_fixupimm_round_pd( +pub fn _mm512_mask_fixupimm_round_pd( a: __m512d, k: __mmask8, b: __m512d, c: __m512i, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpd(a, b, c, IMM8, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpd(a, b, c, IMM8, k, SAE); + transmute(r) + } } /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\ @@ -9845,19 +10568,21 @@ pub unsafe fn _mm512_mask_fixupimm_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm512_maskz_fixupimm_round_pd( +pub fn _mm512_maskz_fixupimm_round_pd( k: __mmask8, a: __m512d, b: __m512d, c: __m512i, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let c = c.as_i64x8(); - let r = vfixupimmpdz(a, b, c, IMM8, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let c = c.as_i64x8(); + let r = vfixupimmpdz(a, b, c, IMM8, k, SAE); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -9878,19 +10603,21 @@ pub unsafe fn _mm512_maskz_fixupimm_round_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(1, 2, 3)] -pub unsafe fn _mm512_getmant_round_ps< +pub fn _mm512_getmant_round_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, >( a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -9911,7 +10638,7 @@ pub unsafe fn _mm512_getmant_round_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4, 5)] -pub unsafe fn _mm512_mask_getmant_round_ps< +pub fn _mm512_mask_getmant_round_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9920,13 +10647,15 @@ pub unsafe fn _mm512_mask_getmant_round_ps< k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -9947,7 +10676,7 @@ pub unsafe fn _mm512_mask_getmant_round_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3, 4)] -pub unsafe fn _mm512_maskz_getmant_round_ps< +pub fn _mm512_maskz_getmant_round_ps< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9955,12 +10684,14 @@ pub unsafe fn _mm512_maskz_getmant_round_ps< k: __mmask16, a: __m512, ) -> __m512 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -9981,19 +10712,21 @@ pub unsafe fn _mm512_maskz_getmant_round_ps< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(1, 2, 3)] -pub unsafe fn _mm512_getmant_round_pd< +pub fn _mm512_getmant_round_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, >( a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -10014,7 +10747,7 @@ pub unsafe fn _mm512_getmant_round_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4, 5)] -pub unsafe fn _mm512_mask_getmant_round_pd< +pub fn _mm512_mask_getmant_round_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -10023,13 +10756,15 @@ pub unsafe fn _mm512_mask_getmant_round_pd< k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let src = src.as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } } /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -10050,7 +10785,7 @@ pub unsafe fn _mm512_mask_getmant_round_pd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3, 4)] -pub unsafe fn _mm512_maskz_getmant_round_pd< +pub fn _mm512_maskz_getmant_round_pd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -10058,28 +10793,32 @@ pub unsafe fn _mm512_maskz_getmant_round_pd< k: __mmask8, a: __m512d, ) -> __m512d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. /// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737) +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i { - transmute(vcvtps2dq( - a.as_f32x16(), - i32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + i32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10089,13 +10828,15 @@ pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { - transmute(vcvtps2dq( - a.as_f32x16(), - src.as_i32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10105,13 +10846,15 @@ pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { - transmute(vcvtps2dq( - a.as_f32x16(), - i32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2dq( + a.as_f32x16(), + i32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10121,9 +10864,11 @@ pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { - let convert = _mm256_cvtps_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8())) +pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { + let convert = _mm256_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8())) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10133,9 +10878,11 @@ pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { - let convert = _mm256_cvtps_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO)) +pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { + unsafe { + let convert = _mm256_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10145,9 +10892,11 @@ pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - let convert = _mm_cvtps_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + let convert = _mm_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10157,123 +10906,131 @@ pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq))] -pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { - let convert = _mm_cvtps_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) +pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { + unsafe { + let convert = _mm_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i { - transmute(vcvtps2udq( - a.as_f32x16(), - u32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + u32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { - transmute(vcvtps2udq( - a.as_f32x16(), - src.as_u32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { - transmute(vcvtps2udq( - a.as_f32x16(), - u32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvtps2udq( + a.as_f32x16(), + u32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i { - transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) +pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { - transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) +pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) +pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i { - transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) +pub fn _mm_cvtps_epu32(a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) +pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] -pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) +pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -10283,13 +11040,15 @@ pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d { - transmute(vcvtps2pd( - a.as_f32x8(), - f64x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtps_pd(a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10299,13 +11058,15 @@ pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d { - transmute(vcvtps2pd( - a.as_f32x8(), - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10315,13 +11076,15 @@ pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { - transmute(vcvtps2pd( - a.as_f32x8(), - f64x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { + unsafe { + transmute(vcvtps2pd( + a.as_f32x8(), + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. @@ -10331,13 +11094,15 @@ pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d { - transmute(vcvtps2pd( - _mm512_castps512_ps256(v2).as_f32x8(), - f64x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d { + unsafe { + transmute(vcvtps2pd( + _mm512_castps512_ps256(v2).as_f32x8(), + f64x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10347,13 +11112,15 @@ pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd))] -pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d { - transmute(vcvtps2pd( - _mm512_castps512_ps256(v2).as_f32x8(), - src.as_f64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d { + unsafe { + transmute(vcvtps2pd( + _mm512_castps512_ps256(v2).as_f32x8(), + src.as_f64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -10363,13 +11130,15 @@ pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 { - transmute(vcvtpd2ps( - a.as_f64x8(), - f32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + f32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10379,13 +11148,15 @@ pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 { - transmute(vcvtpd2ps( - a.as_f64x8(), - src.as_f32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + src.as_f32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10395,13 +11166,15 @@ pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m2 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { - transmute(vcvtpd2ps( - a.as_f64x8(), - f32x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { + unsafe { + transmute(vcvtpd2ps( + a.as_f64x8(), + f32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10411,9 +11184,11 @@ pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 { - let convert = _mm256_cvtpd_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 { + unsafe { + let convert = _mm256_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10423,9 +11198,11 @@ pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { - let convert = _mm256_cvtpd_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) +pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { + unsafe { + let convert = _mm256_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10435,9 +11212,11 @@ pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 { - let convert = _mm_cvtpd_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 { + unsafe { + let convert = _mm_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10447,9 +11226,11 @@ pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { - let convert = _mm_cvtpd_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { + unsafe { + let convert = _mm_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. @@ -10459,13 +11240,15 @@ pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { - transmute(vcvtpd2dq( - a.as_f64x8(), - i32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10475,13 +11258,15 @@ pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvtpd2dq( - a.as_f64x8(), - src.as_i32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10491,13 +11276,15 @@ pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvtpd2dq( - a.as_f64x8(), - i32x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2dq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10507,9 +11294,11 @@ pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { - let convert = _mm256_cvtpd_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10519,9 +11308,11 @@ pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { - let convert = _mm256_cvtpd_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) +pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10531,9 +11322,11 @@ pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - let convert = _mm_cvtpd_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10543,9 +11336,11 @@ pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] -pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { - let convert = _mm_cvtpd_epi32(a); - transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) +pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10555,13 +11350,15 @@ pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { - transmute(vcvtpd2udq( - a.as_f64x8(), - u32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + u32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10571,13 +11368,15 @@ pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvtpd2udq( - a.as_f64x8(), - src.as_u32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + src.as_u32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10587,13 +11386,15 @@ pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvtpd2udq( - a.as_f64x8(), - u32x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvtpd2udq( + a.as_f64x8(), + u32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10603,8 +11404,8 @@ pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { - transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) +pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10614,8 +11415,8 @@ pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) +pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10625,8 +11426,8 @@ pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) +pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10636,8 +11437,8 @@ pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { - transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) +pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10647,8 +11448,8 @@ pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) +pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10658,8 +11459,8 @@ pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] -pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) +pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) } } /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. @@ -10669,18 +11470,20 @@ pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { - let r: f32x8 = vcvtpd2ps( - v2.as_f64x8(), - f32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - ); - simd_shuffle!( - r, - f32x8::ZERO, - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], - ) +pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { + unsafe { + let r: f32x8 = vcvtpd2ps( + v2.as_f64x8(), + f32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + ); + simd_shuffle!( + r, + f32x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } } /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. @@ -10690,18 +11493,20 @@ pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] -pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 { - let r: f32x8 = vcvtpd2ps( - v2.as_f64x8(), - _mm512_castps512_ps256(src).as_f32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - ); - simd_shuffle!( - r, - f32x8::ZERO, - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], - ) +pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 { + unsafe { + let r: f32x8 = vcvtpd2ps( + v2.as_f64x8(), + _mm512_castps512_ps256(src).as_f32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + ); + simd_shuffle!( + r, + f32x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -10711,9 +11516,11 @@ pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i { - let a = a.as_i8x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i8x16(); + transmute::(simd_cast(a)) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10723,9 +11530,11 @@ pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, src.as_i32x16())) +pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10735,9 +11544,11 @@ pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) +pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10747,9 +11558,11 @@ pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10759,9 +11572,11 @@ pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) +pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10771,9 +11586,11 @@ pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10783,9 +11600,11 @@ pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbd))] -pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) +pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } } /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst. @@ -10795,10 +11614,12 @@ pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i { - let a = a.as_i8x16(); - let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v64)) +pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i8x16(); + let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } } /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10808,9 +11629,11 @@ pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10820,9 +11643,11 @@ pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10832,9 +11657,11 @@ pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10844,9 +11671,11 @@ pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10856,9 +11685,11 @@ pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10868,9 +11699,11 @@ pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxbq))] -pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi8_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -10880,9 +11713,11 @@ pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i { - let a = a.as_u8x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u8x16(); + transmute::(simd_cast(a)) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10892,9 +11727,11 @@ pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, src.as_i32x16())) +pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } } /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10904,9 +11741,11 @@ pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) +pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10916,9 +11755,11 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } } /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10928,9 +11769,11 @@ pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) +pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10940,9 +11783,11 @@ pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } } /// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10952,9 +11797,11 @@ pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbd))] -pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) +pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst. @@ -10964,10 +11811,12 @@ pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i { - let a = a.as_u8x16(); - let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v64)) +pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u8x16(); + let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v64)) + } } /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10977,9 +11826,11 @@ pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -10989,9 +11840,11 @@ pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11001,9 +11854,11 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11013,9 +11868,11 @@ pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11025,9 +11882,11 @@ pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11037,9 +11896,11 @@ pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxbq))] -pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu8_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -11049,9 +11910,11 @@ pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i { - let a = a.as_i16x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i16x16(); + transmute::(simd_cast(a)) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11061,9 +11924,11 @@ pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, src.as_i32x16())) +pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11073,9 +11938,11 @@ pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) +pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11085,9 +11952,11 @@ pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11097,9 +11966,11 @@ pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) +pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11109,9 +11980,11 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11121,9 +11994,11 @@ pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwd))] -pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) +pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11133,9 +12008,11 @@ pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i { - let a = a.as_i16x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i16x8(); + transmute::(simd_cast(a)) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11145,9 +12022,11 @@ pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11157,9 +12036,11 @@ pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11169,9 +12050,11 @@ pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11181,9 +12064,11 @@ pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11193,9 +12078,11 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11205,9 +12092,11 @@ pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxwq))] -pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi16_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -11217,9 +12106,11 @@ pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i { - let a = a.as_u16x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u16x16(); + transmute::(simd_cast(a)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11229,9 +12120,11 @@ pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, src.as_i32x16())) +pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, src.as_i32x16())) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11241,9 +12134,11 @@ pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) +pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11253,9 +12148,11 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11265,9 +12162,11 @@ pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) +pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11277,9 +12176,11 @@ pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu16_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11289,9 +12190,11 @@ pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwd))] -pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu16_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) +pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11301,9 +12204,11 @@ pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i { - let a = a.as_u16x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i { + unsafe { + let a = a.as_u16x8(); + transmute::(simd_cast(a)) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11313,9 +12218,11 @@ pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11325,9 +12232,11 @@ pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { - let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11337,9 +12246,11 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11349,9 +12260,11 @@ pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11361,9 +12274,11 @@ pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu16_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11373,9 +12288,11 @@ pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxwq))] -pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu16_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11385,9 +12302,11 @@ pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i { - let a = a.as_i32x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11397,9 +12316,11 @@ pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11409,9 +12330,11 @@ pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i { - let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11421,9 +12344,11 @@ pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11433,9 +12358,11 @@ pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11445,9 +12372,11 @@ pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi32_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11457,9 +12386,11 @@ pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsxdq))] -pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepi32_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepi32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11469,9 +12400,11 @@ pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i { - let a = a.as_u32x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i { + unsafe { + let a = a.as_u32x8(); + transmute::(simd_cast(a)) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11481,9 +12414,11 @@ pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, src.as_i64x8())) +pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, src.as_i64x8())) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11493,9 +12428,11 @@ pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { - let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) +pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11505,9 +12442,11 @@ pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11517,9 +12456,11 @@ pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { - let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) +pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11529,9 +12470,11 @@ pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu32_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) + } } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11541,9 +12484,11 @@ pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovzxdq))] -pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { - let convert = _mm_cvtepu32_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) +pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -11553,9 +12498,11 @@ pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 { - let a = a.as_i32x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11565,9 +12512,11 @@ pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { - let convert = _mm512_cvtepi32_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, convert, src.as_f32x16())) +pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepi32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, src.as_f32x16())) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11577,9 +12526,11 @@ pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 { - let convert = _mm512_cvtepi32_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) +pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepi32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11589,9 +12540,11 @@ pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 { - let convert = _mm256_cvtepi32_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, convert, src.as_f32x8())) +pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 { + unsafe { + let convert = _mm256_cvtepi32_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, convert, src.as_f32x8())) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11601,9 +12554,11 @@ pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 { - let convert = _mm256_cvtepi32_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, convert, f32x8::ZERO)) +pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 { + unsafe { + let convert = _mm256_cvtepi32_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, convert, f32x8::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11613,9 +12568,11 @@ pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { - let convert = _mm_cvtepi32_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, convert, src.as_f32x4())) +pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtepi32_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, convert, src.as_f32x4())) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11625,9 +12582,11 @@ pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] -pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 { - let convert = _mm_cvtepi32_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, convert, f32x4::ZERO)) +pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtepi32_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, convert, f32x4::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -11637,9 +12596,11 @@ pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d { - let a = a.as_i32x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11649,9 +12610,11 @@ pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { - let convert = _mm512_cvtepi32_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, convert, src.as_f64x8())) +pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11661,9 +12624,11 @@ pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d { - let convert = _mm512_cvtepi32_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) +pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11673,9 +12638,11 @@ pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { - let convert = _mm256_cvtepi32_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, convert, src.as_f64x4())) +pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepi32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, src.as_f64x4())) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11685,9 +12652,11 @@ pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d { - let convert = _mm256_cvtepi32_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) +pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepi32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11697,9 +12666,11 @@ pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { - let convert = _mm_cvtepi32_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, convert, src.as_f64x2())) +pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepi32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, src.as_f64x2())) + } } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11709,9 +12680,11 @@ pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d { - let convert = _mm_cvtepi32_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) +pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepi32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -11721,9 +12694,11 @@ pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps))] -pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 { - let a = a.as_u32x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 { + unsafe { + let a = a.as_u32x16(); + transmute::(simd_cast(a)) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11733,9 +12708,11 @@ pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps))] -pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { - let convert = _mm512_cvtepu32_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, convert, src.as_f32x16())) +pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepu32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, src.as_f32x16())) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11745,9 +12722,11 @@ pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps))] -pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 { - let convert = _mm512_cvtepu32_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) +pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + let convert = _mm512_cvtepu32_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -11757,9 +12736,11 @@ pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d { - let a = a.as_u32x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d { + unsafe { + let a = a.as_u32x8(); + transmute::(simd_cast(a)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11769,9 +12750,11 @@ pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { - let convert = _mm512_cvtepu32_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, convert, src.as_f64x8())) +pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11781,9 +12764,11 @@ pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { - let convert = _mm512_cvtepu32_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) +pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -11793,9 +12778,11 @@ pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d { - let a = a.as_u32x4(); - transmute::(simd_cast(a)) +pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d { + unsafe { + let a = a.as_u32x4(); + transmute::(simd_cast(a)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11805,9 +12792,11 @@ pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { - let convert = _mm256_cvtepu32_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, convert, src.as_f64x4())) +pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, src.as_f64x4())) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11817,9 +12806,11 @@ pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { - let convert = _mm256_cvtepu32_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) +pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { + unsafe { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -11829,10 +12820,12 @@ pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { - let a = a.as_u32x4(); - let u64: u32x2 = simd_shuffle!(a, a, [0, 1]); - transmute::(simd_cast(u64)) +pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { + unsafe { + let a = a.as_u32x4(); + let u64: u32x2 = simd_shuffle!(a, a, [0, 1]); + transmute::(simd_cast(u64)) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11842,9 +12835,11 @@ pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { - let convert = _mm_cvtepu32_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, convert, src.as_f64x2())) +pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, src.as_f64x2())) + } } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11854,9 +12849,11 @@ pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { - let convert = _mm_cvtepu32_pd(a).as_f64x2(); - transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) +pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { + unsafe { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) + } } /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. @@ -11866,10 +12863,12 @@ pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d { - let v2 = v2.as_i32x16(); - let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v256)) +pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d { + unsafe { + let v2 = v2.as_i32x16(); + let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v256)) + } } /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11879,9 +12878,11 @@ pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] -pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { - let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8(); - transmute(simd_select_bitmask(k, convert, src.as_f64x8())) +pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { + unsafe { + let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } } /// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. @@ -11891,10 +12892,12 @@ pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d { - let v2 = v2.as_u32x16(); - let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute::(simd_cast(v256)) +pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d { + unsafe { + let v2 = v2.as_u32x16(); + let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute::(simd_cast(v256)) + } } /// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11904,9 +12907,11 @@ pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2pd))] -pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { - let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8(); - transmute(simd_select_bitmask(k, convert, src.as_f64x8())) +pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d { + unsafe { + let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8(); + transmute(simd_select_bitmask(k, convert, src.as_f64x8())) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -11916,9 +12921,11 @@ pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i { - let a = a.as_i32x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11928,9 +12935,11 @@ pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, src.as_i16x16())) +pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, src.as_i16x16())) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11940,9 +12949,11 @@ pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); - transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) +pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -11952,9 +12963,11 @@ pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i { - let a = a.as_i32x8(); - transmute::(simd_cast(a)) +pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i32x8(); + transmute::(simd_cast(a)) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11964,9 +12977,11 @@ pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, src.as_i16x8())) +pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -11976,9 +12991,11 @@ pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) +pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -11988,8 +13005,8 @@ pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) +pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11999,8 +13016,8 @@ pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) +pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) } } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12010,8 +13027,8 @@ pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) +pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12021,9 +13038,11 @@ pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i { - let a = a.as_i32x16(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i { + unsafe { + let a = a.as_i32x16(); + transmute::(simd_cast(a)) + } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12033,9 +13052,11 @@ pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { - let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, convert, src.as_i8x16())) +pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, src.as_i8x16())) + } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12045,9 +13066,11 @@ pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { - let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); - transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) +pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) + } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12057,8 +13080,8 @@ pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12068,8 +13091,8 @@ pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) +pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12079,8 +13102,8 @@ pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) +pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12090,8 +13113,8 @@ pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) +pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12101,8 +13124,8 @@ pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) +pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) } } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12112,8 +13135,8 @@ pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] -pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) +pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12123,9 +13146,11 @@ pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i { - let a = a.as_i64x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i { + unsafe { + let a = a.as_i64x8(); + transmute::(simd_cast(a)) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12135,9 +13160,11 @@ pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12147,9 +13174,11 @@ pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { - let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) +pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12159,9 +13188,11 @@ pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i { - let a = a.as_i64x4(); - transmute::(simd_cast(a)) +pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i { + unsafe { + let a = a.as_i64x4(); + transmute::(simd_cast(a)) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12171,9 +13202,11 @@ pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12183,9 +13216,11 @@ pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { - let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) +pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) + } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12195,8 +13230,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) +pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12206,8 +13241,8 @@ pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) +pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) } } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12217,8 +13252,8 @@ pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) +pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12228,9 +13263,11 @@ pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i { - let a = a.as_i64x8(); - transmute::(simd_cast(a)) +pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i { + unsafe { + let a = a.as_i64x8(); + transmute::(simd_cast(a)) + } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12240,9 +13277,11 @@ pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, src.as_i16x8())) +pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, src.as_i16x8())) + } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12252,9 +13291,11 @@ pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { - let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); - transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) +pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) + } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12264,8 +13305,8 @@ pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) +pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12275,8 +13316,8 @@ pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) +pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12286,8 +13327,8 @@ pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) +pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12297,8 +13338,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) +pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12308,8 +13349,8 @@ pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) +pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) } } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12319,8 +13360,8 @@ pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) +pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12330,8 +13371,8 @@ pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) +pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12341,8 +13382,8 @@ pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) +pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12352,8 +13393,8 @@ pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) +pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12363,8 +13404,8 @@ pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12374,8 +13415,8 @@ pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) +pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12385,8 +13426,8 @@ pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) +pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12396,8 +13437,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) +pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12407,8 +13448,8 @@ pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) +pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) } } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12418,8 +13459,8 @@ pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] -pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) +pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12429,8 +13470,8 @@ pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i { - transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12440,8 +13481,8 @@ pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) +pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12451,8 +13492,8 @@ pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) +pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12462,8 +13503,8 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { - transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) +pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12473,8 +13514,8 @@ pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) +pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12484,8 +13525,8 @@ pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) +pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12495,8 +13536,8 @@ pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) +pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12506,8 +13547,8 @@ pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) +pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) } } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12517,8 +13558,8 @@ pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) +pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12528,8 +13569,8 @@ pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i { - transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12539,8 +13580,8 @@ pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) +pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12550,8 +13591,8 @@ pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) +pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12561,8 +13602,8 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12572,8 +13613,8 @@ pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) +pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12583,8 +13624,8 @@ pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) +pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12594,8 +13635,8 @@ pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) +pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12605,8 +13646,8 @@ pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) +pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) } } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12616,8 +13657,8 @@ pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] -pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) +pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -12627,8 +13668,8 @@ pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i { - transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) +pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12638,8 +13679,8 @@ pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) +pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12649,8 +13690,8 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) +pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -12660,8 +13701,8 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { - transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) +pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12671,8 +13712,8 @@ pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) +pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12682,8 +13723,8 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) +pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -12693,8 +13734,8 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) +pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12704,8 +13745,8 @@ pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) +pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) } } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12715,8 +13756,8 @@ pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) +pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12726,8 +13767,8 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i { - transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) +pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12737,8 +13778,8 @@ pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) +pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12748,8 +13789,8 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) +pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12759,8 +13800,8 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) +pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12770,8 +13811,8 @@ pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) +pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12781,8 +13822,8 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) +pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12792,8 +13833,8 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) +pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12803,8 +13844,8 @@ pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) +pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) } } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12814,8 +13855,8 @@ pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) +pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12825,8 +13866,8 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) +pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12836,8 +13877,8 @@ pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) +pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12847,8 +13888,8 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) +pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12858,8 +13899,8 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12869,8 +13910,8 @@ pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) +pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12880,8 +13921,8 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) +pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12891,8 +13932,8 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) +pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12902,8 +13943,8 @@ pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) +pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) } } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12913,8 +13954,8 @@ pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] -pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) +pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -12924,8 +13965,8 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i { - transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12935,8 +13976,8 @@ pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) +pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12946,8 +13987,8 @@ pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) +pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -12957,8 +13998,8 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { - transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) +pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12968,8 +14009,8 @@ pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) +pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -12979,8 +14020,8 @@ pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) +pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -12990,8 +14031,8 @@ pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) +pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13001,8 +14042,8 @@ pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) +pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13012,8 +14053,8 @@ pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) +pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13023,8 +14064,8 @@ pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i { - transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) +pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13034,8 +14075,8 @@ pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) +pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13045,8 +14086,8 @@ pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) +pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13056,8 +14097,8 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13067,8 +14108,8 @@ pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) +pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13078,8 +14119,8 @@ pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) +pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13089,8 +14130,8 @@ pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) +pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13100,8 +14141,8 @@ pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) +pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) } } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13111,8 +14152,8 @@ pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] -pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) +pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13122,8 +14163,8 @@ pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i { - transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) +pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13133,8 +14174,8 @@ pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) +pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13144,8 +14185,8 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) +pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { + unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13155,8 +14196,8 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { - transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) +pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13166,8 +14207,8 @@ pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) +pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13177,8 +14218,8 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) +pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13188,8 +14229,8 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) +pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13199,8 +14240,8 @@ pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) +pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13210,8 +14251,8 @@ pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) +pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13221,8 +14262,8 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i { - transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) +pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13232,8 +14273,8 @@ pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) +pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13243,8 +14284,8 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) +pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13254,8 +14295,8 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) +pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13265,8 +14306,8 @@ pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) +pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13276,8 +14317,8 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) +pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13287,8 +14328,8 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) +pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13298,8 +14339,8 @@ pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) +pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13309,8 +14350,8 @@ pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) +pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13320,8 +14361,8 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) +pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13331,8 +14372,8 @@ pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) +pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13342,8 +14383,8 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) +pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { + unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13353,8 +14394,8 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) +pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13364,8 +14405,8 @@ pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) +pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13375,8 +14416,8 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) +pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13386,8 +14427,8 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) +pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13397,8 +14438,8 @@ pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) +pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) } } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -13408,8 +14449,8 @@ pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] -pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) +pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. @@ -13420,18 +14461,20 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13442,23 +14485,25 @@ pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epi32( +pub fn _mm512_mask_cvt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, ) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let src = src.as_i32x16(); - let r = vcvtps2dq(a, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_i32x16(); + let r = vcvtps2dq(a, src, k, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13469,21 +14514,20 @@ pub unsafe fn _mm512_mask_cvt_roundps_epi32( /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epi32( - k: __mmask16, - a: __m512, -) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\ @@ -13494,18 +14538,20 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi32( /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13516,23 +14562,25 @@ pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundps_epu32( +pub fn _mm512_mask_cvt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, ) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let src = src.as_u32x16(); - let r = vcvtps2udq(a, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let src = src.as_u32x16(); + let r = vcvtps2udq(a, src, k, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13543,37 +14591,38 @@ pub unsafe fn _mm512_mask_cvt_roundps_epu32( /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_epu32( - k: __mmask16, - a: __m512, -) -> __m512i { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x16(); - let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x16(); + let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f32x8(); - let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13585,16 +14634,14 @@ pub unsafe fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundps_pd( - src: __m512d, - k: __mmask8, - a: __m256, -) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f32x8(); - let src = src.as_f64x8(); - let r = vcvtps2pd(a, src, k, SAE); - transmute(r) +pub fn _mm512_mask_cvt_roundps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let src = src.as_f64x8(); + let r = vcvtps2pd(a, src, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13606,11 +14653,13 @@ pub unsafe fn _mm512_mask_cvt_roundps_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { - static_assert_sae!(SAE); - let a = a.as_f32x8(); - let r = vcvtps2pd(a, f64x8::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x8(); + let r = vcvtps2pd(a, f64x8::ZERO, k, SAE); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\ @@ -13628,11 +14677,13 @@ pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13650,16 +14701,18 @@ pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epi32( +pub fn _mm512_mask_cvt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, ) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let src = src.as_i32x8(); - let r = vcvtpd2dq(a, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvtpd2dq(a, src, k, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13677,14 +14730,13 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( - k: __mmask8, - a: __m512d, -) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\ @@ -13702,11 +14754,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13724,16 +14778,18 @@ pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_epu32( +pub fn _mm512_mask_cvt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, ) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let src = src.as_u32x8(); - let r = vcvtpd2udq(a, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_u32x8(); + let r = vcvtpd2udq(a, src, k, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13751,14 +14807,13 @@ pub unsafe fn _mm512_mask_cvt_roundpd_epu32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( - k: __mmask8, - a: __m512d, -) -> __m256i { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ @@ -13776,11 +14831,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13798,16 +14855,18 @@ pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundpd_ps( +pub fn _mm512_mask_cvt_roundpd_ps( src: __m256, k: __mmask8, a: __m512d, ) -> __m256 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let src = src.as_f32x8(); - let r = vcvtpd2ps(a, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let src = src.as_f32x8(); + let r = vcvtpd2ps(a, src, k, ROUNDING); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13825,11 +14884,13 @@ pub unsafe fn _mm512_mask_cvt_roundpd_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x8(); - let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x8(); + let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ @@ -13847,11 +14908,13 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_i32x16(); - let r = vcvtdq2ps(a, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(r) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13869,15 +14932,17 @@ pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i) -> __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundepi32_ps( +pub fn _mm512_mask_cvt_roundepi32_ps( src: __m512, k: __mmask16, a: __m512i, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_i32x16(); - let r = vcvtdq2ps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13895,14 +14960,13 @@ pub unsafe fn _mm512_mask_cvt_roundepi32_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundepi32_ps( - k: __mmask16, - a: __m512i, -) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_i32x16(); - let r = vcvtdq2ps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) +pub fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_i32x16(); + let r = vcvtdq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ @@ -13920,11 +14984,13 @@ pub unsafe fn _mm512_maskz_cvt_roundepi32_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_u32x16(); - let r = vcvtudq2ps(a, ROUNDING); - transmute(r) +pub fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(r) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -13942,15 +15008,17 @@ pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i) -> __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundepu32_ps( +pub fn _mm512_mask_cvt_roundepu32_ps( src: __m512, k: __mmask16, a: __m512i, ) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_u32x16(); - let r = vcvtudq2ps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, src.as_f32x16())) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, src.as_f32x16())) + } } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -13968,67 +15036,72 @@ pub unsafe fn _mm512_mask_cvt_roundepu32_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( - k: __mmask16, - a: __m512i, -) -> __m512 { - static_assert_rounding!(ROUNDING); - let a = a.as_u32x16(); - let r = vcvtudq2ps(a, ROUNDING); - transmute(simd_select_bitmask(k, r, f32x16::ZERO)) +pub fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i) -> __m512 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_u32x16(); + let r = vcvtudq2ps(a, ROUNDING); + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); - transmute(r) +pub fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundps_ph( +pub fn _mm512_mask_cvt_roundps_ph( src: __m256i, k: __mmask16, a: __m512, ) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_i16x16(); - let r = vcvtps2ph(a, SAE, src, k); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_i16x16(); + let r = vcvtps2ph(a, SAE, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); - transmute(r) +pub fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14038,23 +15111,25 @@ pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m51 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_cvt_roundps_ph( +pub fn _mm256_mask_cvt_roundps_ph( src: __m128i, k: __mmask8, a: __m256, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let src = src.as_i16x8(); - let r = vcvtps2ph256(a, IMM8, src, k); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_i16x8(); + let r = vcvtps2ph256(a, IMM8, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14064,18 +15139,20 @@ pub unsafe fn _mm256_mask_cvt_roundps_ph( /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); - transmute(r) +pub fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14085,23 +15162,21 @@ pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m25 /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_cvt_roundps_ph( - src: __m128i, - k: __mmask8, - a: __m128, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let src = src.as_i16x8(); - let r = vcvtps2ph128(a, IMM8, src, k); - transmute(r) +pub fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_i16x8(); + let r = vcvtps2ph128(a, IMM8, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14111,71 +15186,75 @@ pub unsafe fn _mm_mask_cvt_roundps_ph( /// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions /// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); - transmute(r) +pub fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvtps_ph(a: __m512) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); - transmute(r) +pub fn _mm512_cvtps_ph(a: __m512) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvtps_ph( - src: __m256i, - k: __mmask16, - a: __m512, -) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_i16x16(); - let r = vcvtps2ph(a, SAE, src, k); - transmute(r) +pub fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_i16x16(); + let r = vcvtps2ph(a, SAE, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); - transmute(r) +pub fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14185,23 +15264,21 @@ pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> /// * [`_MM_FROUND_TO_POS_INF`] : round up /// * [`_MM_FROUND_TO_ZERO`] : truncate /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_cvtps_ph( - src: __m128i, - k: __mmask8, - a: __m256, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let src = src.as_i16x8(); - let r = vcvtps2ph256(a, IMM8, src, k); - transmute(r) +pub fn _mm256_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let src = src.as_i16x8(); + let r = vcvtps2ph256(a, IMM8, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14211,18 +15288,20 @@ pub unsafe fn _mm256_mask_cvtps_ph( /// * [`_MM_FROUND_TO_POS_INF`] : round up /// * [`_MM_FROUND_TO_ZERO`] : truncate /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x8(); - let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); - transmute(r) +pub fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x8(); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14232,19 +15311,21 @@ pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256) -> /// * [`_MM_FROUND_TO_POS_INF`] : round up /// * [`_MM_FROUND_TO_ZERO`] : truncate /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let src = src.as_i16x8(); - let r = vcvtps2ph128(a, IMM8, src, k); - transmute(r) +pub fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let src = src.as_i16x8(); + let r = vcvtps2ph128(a, IMM8, src, k); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14254,71 +15335,75 @@ pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: _ /// * [`_MM_FROUND_TO_POS_INF`] : round up /// * [`_MM_FROUND_TO_ZERO`] : truncate /// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); - transmute(r) +pub fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); + transmute(r) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_i16x16(); - let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) +pub fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvt_roundph_ps( - src: __m512, - k: __mmask16, - a: __m256i, -) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_i16x16(); - let src = src.as_f32x16(); - let r = vcvtph2ps(a, src, k, SAE); - transmute(r) +pub fn _mm512_mask_cvt_roundph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let src = src.as_f32x16(); + let r = vcvtph2ps(a, src, k, SAE); + transmute(r) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { - static_assert_sae!(SAE); - let a = a.as_i16x16(); - let r = vcvtph2ps(a, f32x16::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_i16x16(); + let r = vcvtph2ps(a, f32x16::ZERO, k, SAE); + transmute(r) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -14328,13 +15413,15 @@ pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m25 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 { - transmute(vcvtph2ps( - a.as_i16x16(), - f32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_NO_EXC, - )) +pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 { + unsafe { + transmute(vcvtph2ps( + a.as_i16x16(), + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_NO_EXC, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14344,13 +15431,15 @@ pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 { - transmute(vcvtph2ps( - a.as_i16x16(), - src.as_f32x16(), - k, - _MM_FROUND_NO_EXC, - )) +pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 { + unsafe { + transmute(vcvtph2ps( + a.as_i16x16(), + src.as_f32x16(), + k, + _MM_FROUND_NO_EXC, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14360,8 +15449,8 @@ pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { - transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) +pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { + unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14371,9 +15460,11 @@ pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 { - let convert = _mm256_cvtph_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8())) +pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 { + unsafe { + let convert = _mm256_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8())) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14383,9 +15474,11 @@ pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { - let convert = _mm256_cvtph_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { + unsafe { + let convert = _mm256_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14395,9 +15488,11 @@ pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { - let convert = _mm_cvtph_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14407,25 +15502,29 @@ pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] -pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { - let convert = _mm_cvtph_ps(a); - transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { + unsafe { + let convert = _mm_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) +pub fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14437,16 +15536,18 @@ pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epi32( +pub fn _mm512_mask_cvtt_roundps_epi32( src: __m512i, k: __mmask16, a: __m512, ) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_i32x16(); - let r = vcvttps2dq(a, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_i32x16(); + let r = vcvttps2dq(a, src, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14458,27 +15559,31 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvttps2dq(a, i32x16::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2dq(a, i32x16::ZERO, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922) +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE); - transmute(r) +pub fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14490,16 +15595,18 @@ pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvtt_roundps_epu32( +pub fn _mm512_mask_cvtt_roundps_epu32( src: __m512i, k: __mmask16, a: __m512, ) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let src = src.as_u32x16(); - let r = vcvttps2udq(a, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let src = src.as_u32x16(); + let r = vcvttps2udq(a, src, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14511,27 +15618,31 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { - static_assert_sae!(SAE); - let a = a.as_f32x16(); - let r = vcvttps2udq(a, u32x16::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x16(); + let r = vcvttps2udq(a, u32x16::ZERO, k, SAE); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14543,16 +15654,18 @@ pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( +pub fn _mm512_mask_cvtt_roundpd_epi32( src: __m256i, k: __mmask8, a: __m512d, ) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let src = src.as_i32x8(); - let r = vcvttpd2dq(a, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvttpd2dq(a, src, k, SAE); + transmute(r) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14564,27 +15677,31 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ @@ -14596,32 +15713,36 @@ pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( +pub fn _mm512_mask_cvtt_roundpd_epu32( src: __m256i, k: __mmask8, a: __m512d, ) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let src = src.as_i32x8(); - let r = vcvttpd2udq(a, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let src = src.as_i32x8(); + let r = vcvttpd2udq(a, src, k, SAE); + transmute(r) + } } -/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i { - transmute(vcvttps2dq( - a.as_f32x16(), - i32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + i32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14631,13 +15752,15 @@ pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { - transmute(vcvttps2dq( - a.as_f32x16(), - src.as_i32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14647,13 +15770,15 @@ pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { - transmute(vcvttps2dq( - a.as_f32x16(), - i32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2dq( + a.as_f32x16(), + i32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14663,8 +15788,8 @@ pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) +pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14674,8 +15799,8 @@ pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) +pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14685,8 +15810,8 @@ pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) +pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14696,24 +15821,26 @@ pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] -pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) +pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) } } -/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i { - transmute(vcvttps2udq( - a.as_f32x16(), - u32x16::ZERO, - 0b11111111_11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + u32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14723,13 +15850,15 @@ pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { - transmute(vcvttps2udq( - a.as_f32x16(), - src.as_u32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14739,24 +15868,26 @@ pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { - transmute(vcvttps2udq( - a.as_f32x16(), - u32x16::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { + unsafe { + transmute(vcvttps2udq( + a.as_f32x16(), + u32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } -/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i { - transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) +pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14766,8 +15897,8 @@ pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) +pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14777,19 +15908,19 @@ pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) +pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { + unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } } -/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i { - transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) +pub fn _mm_cvttps_epu32(a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14799,8 +15930,8 @@ pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) +pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) } } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14810,8 +15941,8 @@ pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] -pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) +pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { + unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -14823,27 +15954,31 @@ pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { - static_assert_sae!(SAE); - let a = a.as_f64x8(); - let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE); - transmute(r) +pub fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x8(); + let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE); + transmute(r) + } } -/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i { - transmute(vcvttpd2dq( - a.as_f64x8(), - i32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14853,13 +15988,15 @@ pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvttpd2dq( - a.as_f64x8(), - src.as_i32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14869,13 +16006,15 @@ pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvttpd2dq( - a.as_f64x8(), - i32x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2dq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14885,8 +16024,8 @@ pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) +pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14896,8 +16035,8 @@ pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) +pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14907,8 +16046,8 @@ pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) +pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14918,24 +16057,26 @@ pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] -pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) +pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) } } -/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i { - transmute(vcvttpd2udq( - a.as_f64x8(), - i32x8::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + i32x8::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14945,13 +16086,15 @@ pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvttpd2udq( - a.as_f64x8(), - src.as_i32x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14961,24 +16104,26 @@ pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { - transmute(vcvttpd2udq( - a.as_f64x8(), - i32x8::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + unsafe { + transmute(vcvttpd2udq( + a.as_f64x8(), + i32x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } -/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { - transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) +pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14988,8 +16133,8 @@ pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) +pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -14999,19 +16144,19 @@ pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) +pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) } } -/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { - transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) +pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15021,8 +16166,8 @@ pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) +pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -15032,8 +16177,8 @@ pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] -pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) +pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) } } /// Returns vector of type `__m512d` with all elements set to zero. @@ -15043,9 +16188,9 @@ pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_pd() -> __m512d { +pub fn _mm512_setzero_pd() -> __m512d { // All-0 is a properly initialized __m512d - const { mem::zeroed() } + unsafe { const { mem::zeroed() } } } /// Returns vector of type `__m512` with all elements set to zero. @@ -15055,9 +16200,9 @@ pub unsafe fn _mm512_setzero_pd() -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_ps() -> __m512 { +pub fn _mm512_setzero_ps() -> __m512 { // All-0 is a properly initialized __m512 - const { mem::zeroed() } + unsafe { const { mem::zeroed() } } } /// Return vector of type `__m512` with all elements set to zero. @@ -15067,9 +16212,9 @@ pub unsafe fn _mm512_setzero_ps() -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero() -> __m512 { +pub fn _mm512_setzero() -> __m512 { // All-0 is a properly initialized __m512 - const { mem::zeroed() } + unsafe { const { mem::zeroed() } } } /// Returns vector of type `__m512i` with all elements set to zero. @@ -15079,9 +16224,9 @@ pub unsafe fn _mm512_setzero() -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_si512() -> __m512i { +pub fn _mm512_setzero_si512() -> __m512i { // All-0 is a properly initialized __m512i - const { mem::zeroed() } + unsafe { const { mem::zeroed() } } } /// Return vector of type `__m512i` with all elements set to zero. @@ -15091,9 +16236,9 @@ pub unsafe fn _mm512_setzero_si512() -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_epi32() -> __m512i { +pub fn _mm512_setzero_epi32() -> __m512i { // All-0 is a properly initialized __m512i - const { mem::zeroed() } + unsafe { const { mem::zeroed() } } } /// Sets packed 32-bit integers in `dst` with the supplied values in reverse @@ -15103,7 +16248,7 @@ pub unsafe fn _mm512_setzero_epi32() -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr_epi32( +pub fn _mm512_setr_epi32( e15: i32, e14: i32, e13: i32, @@ -15121,10 +16266,12 @@ pub unsafe fn _mm512_setr_epi32( e1: i32, e0: i32, ) -> __m512i { - let r = i32x16::new( - e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, - ); - transmute(r) + unsafe { + let r = i32x16::new( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ); + transmute(r) + } } /// Set packed 8-bit integers in dst with the supplied values. @@ -15133,7 +16280,7 @@ pub unsafe fn _mm512_setr_epi32( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_epi8( +pub fn _mm512_set_epi8( e63: i8, e62: i8, e61: i8, @@ -15199,13 +16346,15 @@ pub unsafe fn _mm512_set_epi8( e1: i8, e0: i8, ) -> __m512i { - let r = i8x64::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, - e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37, - e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, - e56, e57, e58, e59, e60, e61, e62, e63, - ); - transmute(r) + unsafe { + let r = i8x64::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, + e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, + e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, + e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, + ); + transmute(r) + } } /// Set packed 16-bit integers in dst with the supplied values. @@ -15214,7 +16363,7 @@ pub unsafe fn _mm512_set_epi8( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_epi16( +pub fn _mm512_set_epi16( e31: i16, e30: i16, e29: i16, @@ -15248,11 +16397,13 @@ pub unsafe fn _mm512_set_epi16( e1: i16, e0: i16, ) -> __m512i { - let r = i16x32::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, - e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, - ); - transmute(r) + unsafe { + let r = i16x32::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, + e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, + ); + transmute(r) + } } /// Set packed 32-bit integers in dst with the repeated 4 element sequence. @@ -15261,7 +16412,7 @@ pub unsafe fn _mm512_set_epi16( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { +pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a) } @@ -15271,7 +16422,7 @@ pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { +pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a) } @@ -15281,7 +16432,7 @@ pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { +pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { _mm512_set_pd(d, c, b, a, d, c, b, a) } @@ -15291,7 +16442,7 @@ pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { +pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d) } @@ -15301,7 +16452,7 @@ pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { +pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d) } @@ -15311,7 +16462,7 @@ pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { +pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { _mm512_set_pd(a, b, c, d, a, b, c, d) } @@ -15321,7 +16472,7 @@ pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_epi64( +pub fn _mm512_set_epi64( e0: i64, e1: i64, e2: i64, @@ -15340,7 +16491,7 @@ pub unsafe fn _mm512_set_epi64( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr_epi64( +pub fn _mm512_setr_epi64( e0: i64, e1: i64, e2: i64, @@ -15350,8 +16501,10 @@ pub unsafe fn _mm512_setr_epi64( e6: i64, e7: i64, ) -> __m512i { - let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) + unsafe { + let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) + } } /// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8. @@ -17188,8 +18341,8 @@ pub unsafe fn _mm_mmask_i64gather_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) +pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17199,8 +18352,8 @@ pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i { - transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) +pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17210,8 +18363,8 @@ pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) +pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17221,8 +18374,8 @@ pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) +pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17232,8 +18385,8 @@ pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) +pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17243,8 +18396,8 @@ pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) +pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17254,8 +18407,8 @@ pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) +pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17265,8 +18418,8 @@ pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i { - transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) +pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17276,8 +18429,8 @@ pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17287,8 +18440,8 @@ pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) +pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17298,8 +18451,8 @@ pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) +pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) } } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17309,8 +18462,8 @@ pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) +pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17320,8 +18473,8 @@ pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) +pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17331,8 +18484,8 @@ pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) +pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17342,8 +18495,8 @@ pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17353,8 +18506,8 @@ pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17364,8 +18517,8 @@ pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) } } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17375,8 +18528,8 @@ pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17386,8 +18539,8 @@ pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) +pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17397,8 +18550,8 @@ pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) +pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17408,8 +18561,8 @@ pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17419,8 +18572,8 @@ pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17430,8 +18583,8 @@ pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) } } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -17441,8 +18594,8 @@ pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) } } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -17584,8 +18737,8 @@ pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) +pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) } } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17595,8 +18748,8 @@ pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i { - transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) +pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) } } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17606,8 +18759,8 @@ pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) +pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) } } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17617,8 +18770,8 @@ pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) +pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) } } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17628,8 +18781,8 @@ pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) +pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) } } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17639,8 +18792,8 @@ pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] -pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) +pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17650,8 +18803,8 @@ pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) +pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17661,8 +18814,8 @@ pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i { - transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) +pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17672,8 +18825,8 @@ pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) +pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17683,8 +18836,8 @@ pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) +pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17694,8 +18847,8 @@ pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) +pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) } } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17705,8 +18858,8 @@ pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] -pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) +pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17716,8 +18869,8 @@ pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) +pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17727,8 +18880,8 @@ pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) +pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17738,8 +18891,8 @@ pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) +pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17749,8 +18902,8 @@ pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) +pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17760,8 +18913,8 @@ pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) } } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17771,8 +18924,8 @@ pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] -pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17782,8 +18935,8 @@ pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) +pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17793,8 +18946,8 @@ pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) +pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17804,8 +18957,8 @@ pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) +pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17815,8 +18968,8 @@ pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) +pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17826,8 +18979,8 @@ pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) } } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17837,8 +18990,8 @@ pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] -pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17849,11 +19002,13 @@ pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_rol_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(r) +pub fn _mm512_rol_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17864,15 +19019,13 @@ pub unsafe fn _mm512_rol_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_rol_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17883,11 +19036,13 @@ pub unsafe fn _mm512_mask_rol_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprold(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprold(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17898,11 +19053,13 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_rol_epi32(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(r) +pub fn _mm256_rol_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17913,15 +19070,13 @@ pub unsafe fn _mm256_rol_epi32(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_rol_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17932,11 +19087,13 @@ pub unsafe fn _mm256_mask_rol_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprold256(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprold256(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -17947,11 +19104,13 @@ pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_rol_epi32(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(r) +pub fn _mm_rol_epi32(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -17962,15 +19121,13 @@ pub unsafe fn _mm_rol_epi32(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_rol_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -17981,11 +19138,13 @@ pub unsafe fn _mm_mask_rol_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprold128(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprold128(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -17996,11 +19155,13 @@ pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_ror_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(r) +pub fn _mm512_ror_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18011,15 +19172,13 @@ pub unsafe fn _mm512_ror_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_ror_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18030,11 +19189,13 @@ pub unsafe fn _mm512_mask_ror_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let r = vprord(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let r = vprord(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18045,11 +19206,13 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_ror_epi32(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(r) +pub fn _mm256_ror_epi32(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18060,15 +19223,13 @@ pub unsafe fn _mm256_ror_epi32(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_ror_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18079,11 +19240,13 @@ pub unsafe fn _mm256_mask_ror_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let r = vprord256(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let r = vprord256(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18094,11 +19257,13 @@ pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_ror_epi32(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(r) +pub fn _mm_ror_epi32(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18109,15 +19274,13 @@ pub unsafe fn _mm_ror_epi32(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_ror_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18128,11 +19291,13 @@ pub unsafe fn _mm_mask_ror_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let r = vprord128(a, IMM8); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let r = vprord128(a, IMM8); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18143,11 +19308,13 @@ pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_rol_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(r) +pub fn _mm512_rol_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18158,15 +19325,13 @@ pub unsafe fn _mm512_rol_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_rol_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x8())) +pub fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18177,11 +19342,13 @@ pub unsafe fn _mm512_mask_rol_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprolq(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) +pub fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprolq(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18192,11 +19359,13 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_rol_epi64(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(r) +pub fn _mm256_rol_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18207,15 +19376,13 @@ pub unsafe fn _mm256_rol_epi64(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_rol_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x4())) +pub fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18226,11 +19393,13 @@ pub unsafe fn _mm256_mask_rol_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprolq256(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) +pub fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprolq256(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18241,11 +19410,13 @@ pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_rol_epi64(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(r) +pub fn _mm_rol_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18256,15 +19427,13 @@ pub unsafe fn _mm_rol_epi64(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_rol_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x2())) +pub fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18275,11 +19444,13 @@ pub unsafe fn _mm_mask_rol_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprolq128(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) +pub fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprolq128(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18290,11 +19461,13 @@ pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_ror_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(r) +pub fn _mm512_ror_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18305,15 +19478,13 @@ pub unsafe fn _mm512_ror_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_ror_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x8())) +pub fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18324,11 +19495,13 @@ pub unsafe fn _mm512_mask_ror_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x8(); - let r = vprorq(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) +pub fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x8(); + let r = vprorq(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18339,11 +19512,13 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_ror_epi64(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(r) +pub fn _mm256_ror_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18354,15 +19529,13 @@ pub unsafe fn _mm256_ror_epi64(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_ror_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x4())) +pub fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18373,11 +19546,13 @@ pub unsafe fn _mm256_mask_ror_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x4(); - let r = vprorq256(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) +pub fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x4(); + let r = vprorq256(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18388,11 +19563,13 @@ pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_ror_epi64(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(r) +pub fn _mm_ror_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(r) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18403,15 +19580,13 @@ pub unsafe fn _mm_ror_epi64(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_ror_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(simd_select_bitmask(k, r, src.as_i64x2())) +pub fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18422,11 +19597,13 @@ pub unsafe fn _mm_mask_ror_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i64x2(); - let r = vprorq128(a, IMM8); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) +pub fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i64x2(); + let r = vprorq128(a, IMM8); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -18437,12 +19614,14 @@ pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_slli_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm512_setzero_si512() - } else { - transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8))) +pub fn _mm512_slli_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8))) + } } } @@ -18454,18 +19633,16 @@ pub unsafe fn _mm512_slli_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_slli_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 32 { - u32x16::ZERO - } else { - simd_shl(a.as_u32x16(), u32x16::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u32x16())) +pub fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 32 { + u32x16::ZERO + } else { + simd_shl(a.as_u32x16(), u32x16::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u32x16())) + } } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18476,13 +19653,15 @@ pub unsafe fn _mm512_mask_slli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm512_setzero_si512() - } else { - let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8)); - transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) +pub fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8)); + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) + } } } @@ -18494,18 +19673,16 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_slli_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 32 { - u32x8::ZERO - } else { - simd_shl(a.as_u32x8(), u32x8::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, r, src.as_u32x8())) +pub fn _mm256_mask_slli_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x8::ZERO + } else { + simd_shl(a.as_u32x8(), u32x8::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x8())) + } } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18516,13 +19693,15 @@ pub unsafe fn _mm256_mask_slli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm256_setzero_si256() - } else { - let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8)); - transmute(simd_select_bitmask(k, r, u32x8::ZERO)) +pub fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) + } } } @@ -18534,18 +19713,16 @@ pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_slli_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 32 { - u32x4::ZERO - } else { - simd_shl(a.as_u32x4(), u32x4::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, r, src.as_u32x4())) +pub fn _mm_mask_slli_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x4::ZERO + } else { + simd_shl(a.as_u32x4(), u32x4::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x4())) + } } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18556,13 +19733,15 @@ pub unsafe fn _mm_mask_slli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm_setzero_si128() - } else { - let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8)); - transmute(simd_select_bitmask(k, r, u32x4::ZERO)) +pub fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) + } } } @@ -18574,12 +19753,14 @@ pub unsafe fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srli_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm512_setzero_si512() - } else { - transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8))) +pub fn _mm512_srli_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8))) + } } } @@ -18591,18 +19772,16 @@ pub unsafe fn _mm512_srli_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srli_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 32 { - u32x16::ZERO - } else { - simd_shr(a.as_u32x16(), u32x16::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u32x16())) +pub fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 32 { + u32x16::ZERO + } else { + simd_shr(a.as_u32x16(), u32x16::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u32x16())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18613,13 +19792,15 @@ pub unsafe fn _mm512_mask_srli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm512_setzero_si512() - } else { - let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8)); - transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) +pub fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8)); + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) + } } } @@ -18631,18 +19812,16 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srli_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 32 { - u32x8::ZERO - } else { - simd_shr(a.as_u32x8(), u32x8::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, r, src.as_u32x8())) +pub fn _mm256_mask_srli_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x8::ZERO + } else { + simd_shr(a.as_u32x8(), u32x8::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x8())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18653,13 +19832,15 @@ pub unsafe fn _mm256_mask_srli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm256_setzero_si256() - } else { - let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8)); - transmute(simd_select_bitmask(k, r, u32x8::ZERO)) +pub fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm256_setzero_si256() + } else { + let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) + } } } @@ -18671,18 +19852,16 @@ pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srli_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 32 { - u32x4::ZERO - } else { - simd_shr(a.as_u32x4(), u32x4::splat(IMM8)) - }; - transmute(simd_select_bitmask(k, r, src.as_u32x4())) +pub fn _mm_mask_srli_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 32 { + u32x4::ZERO + } else { + simd_shr(a.as_u32x4(), u32x4::splat(IMM8)) + }; + transmute(simd_select_bitmask(k, r, src.as_u32x4())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18693,13 +19872,15 @@ pub unsafe fn _mm_mask_srli_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm_setzero_si128() - } else { - let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8)); - transmute(simd_select_bitmask(k, r, u32x4::ZERO)) +pub fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8)); + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) + } } } @@ -18711,12 +19892,14 @@ pub unsafe fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_slli_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm512_setzero_si512() - } else { - transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))) +pub fn _mm512_slli_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))) + } } } @@ -18728,18 +19911,16 @@ pub unsafe fn _mm512_slli_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_slli_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 64 { - u64x8::ZERO - } else { - simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u64x8())) +pub fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 64 { + u64x8::ZERO + } else { + simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u64x8())) + } } /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18750,13 +19931,15 @@ pub unsafe fn _mm512_mask_slli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm512_setzero_si512() - } else { - let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) +pub fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) + } } } @@ -18768,18 +19951,16 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_slli_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 64 { - u64x4::ZERO - } else { - simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, r, src.as_u64x4())) +pub fn _mm256_mask_slli_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x4::ZERO + } else { + simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x4())) + } } /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18790,13 +19971,15 @@ pub unsafe fn _mm256_mask_slli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm256_setzero_si256() - } else { - let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, r, u64x4::ZERO)) +pub fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) + } } } @@ -18808,18 +19991,16 @@ pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_slli_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 64 { - u64x2::ZERO - } else { - simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, r, src.as_u64x2())) +pub fn _mm_mask_slli_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x2::ZERO + } else { + simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x2())) + } } /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18830,13 +20011,15 @@ pub unsafe fn _mm_mask_slli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm_setzero_si128() - } else { - let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, r, u64x2::ZERO)) +pub fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) + } } } @@ -18848,12 +20031,14 @@ pub unsafe fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srli_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm512_setzero_si512() - } else { - transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))) +pub fn _mm512_srli_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))) + } } } @@ -18865,18 +20050,16 @@ pub unsafe fn _mm512_srli_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srli_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = if IMM8 >= 64 { - u64x8::ZERO - } else { - simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, shf, src.as_u64x8())) +pub fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = if IMM8 >= 64 { + u64x8::ZERO + } else { + simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, shf, src.as_u64x8())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18887,13 +20070,15 @@ pub unsafe fn _mm512_mask_srli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm512_setzero_si512() - } else { - let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) +pub fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm512_setzero_si512() + } else { + let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) + } } } @@ -18905,18 +20090,16 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srli_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 64 { - u64x4::ZERO - } else { - simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, r, src.as_u64x4())) +pub fn _mm256_mask_srli_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x4::ZERO + } else { + simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x4())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18927,13 +20110,15 @@ pub unsafe fn _mm256_mask_srli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm256_setzero_si256() - } else { - let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, r, u64x4::ZERO)) +pub fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm256_setzero_si256() + } else { + let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) + } } } @@ -18945,18 +20130,16 @@ pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srli_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = if IMM8 >= 64 { - u64x2::ZERO - } else { - simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)) - }; - transmute(simd_select_bitmask(k, r, src.as_u64x2())) +pub fn _mm_mask_srli_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = if IMM8 >= 64 { + u64x2::ZERO + } else { + simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)) + }; + transmute(simd_select_bitmask(k, r, src.as_u64x2())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -18967,13 +20150,15 @@ pub unsafe fn _mm_mask_srli_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm_setzero_si128() - } else { - let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)); - transmute(simd_select_bitmask(k, r, u64x2::ZERO)) +pub fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)); + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) + } } } @@ -18984,8 +20169,8 @@ pub unsafe fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpslld(a.as_i32x16(), count.as_i32x4())) +pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18995,14 +20180,11 @@ pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_mask_sll_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sll_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19012,9 +20194,11 @@ pub unsafe fn _mm512_mask_sll_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sll_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19024,14 +20208,11 @@ pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm256_mask_sll_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sll_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19041,9 +20222,11 @@ pub unsafe fn _mm256_mask_sll_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sll_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19053,9 +20236,11 @@ pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19065,9 +20250,11 @@ pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -19077,8 +20264,8 @@ pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) +pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19088,14 +20275,11 @@ pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_mask_srl_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_srl_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19105,9 +20289,11 @@ pub unsafe fn _mm512_mask_srl_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_srl_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19117,14 +20303,11 @@ pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm256_mask_srl_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_srl_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19134,9 +20317,11 @@ pub unsafe fn _mm256_mask_srl_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_srl_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19146,9 +20331,11 @@ pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19158,9 +20345,11 @@ pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. @@ -19170,8 +20359,8 @@ pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) +pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19181,14 +20370,11 @@ pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_mask_sll_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sll_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19198,9 +20384,11 @@ pub unsafe fn _mm512_mask_sll_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sll_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19210,14 +20398,11 @@ pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm256_mask_sll_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sll_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19227,9 +20412,11 @@ pub unsafe fn _mm256_mask_sll_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sll_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sll_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19239,9 +20426,11 @@ pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19251,9 +20440,11 @@ pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sll_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sll_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -19263,8 +20454,8 @@ pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) +pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19274,14 +20465,11 @@ pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_mask_srl_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_srl_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19291,9 +20479,11 @@ pub unsafe fn _mm512_mask_srl_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_srl_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19303,14 +20493,11 @@ pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm256_mask_srl_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_srl_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19320,9 +20507,11 @@ pub unsafe fn _mm256_mask_srl_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_srl_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_srl_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19332,9 +20521,11 @@ pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19344,9 +20535,11 @@ pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srl_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srl_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -19356,8 +20549,8 @@ pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) +pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19367,14 +20560,11 @@ pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_mask_sra_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sra_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19384,9 +20574,11 @@ pub unsafe fn _mm512_mask_sra_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sra_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19396,14 +20588,11 @@ pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm256_mask_sra_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sra_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19413,9 +20602,11 @@ pub unsafe fn _mm256_mask_sra_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sra_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19425,9 +20616,11 @@ pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19437,9 +20630,11 @@ pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -19449,8 +20644,8 @@ pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) +pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19460,14 +20655,11 @@ pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_mask_sra_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sra_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19477,9 +20669,11 @@ pub unsafe fn _mm512_mask_sra_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sra_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + unsafe { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -19489,8 +20683,8 @@ pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { - transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) +pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { + unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19500,14 +20694,11 @@ pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm256_mask_sra_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m128i, -) -> __m256i { - let shf = _mm256_sra_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19517,9 +20708,11 @@ pub unsafe fn _mm256_mask_sra_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { - let shf = _mm256_sra_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { + unsafe { + let shf = _mm256_sra_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -19529,8 +20722,8 @@ pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19540,9 +20733,11 @@ pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19552,9 +20747,11 @@ pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sra_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sra_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -19565,9 +20762,11 @@ pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32))) +pub fn _mm512_srai_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32))) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19578,14 +20777,12 @@ pub unsafe fn _mm512_srai_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srai_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19596,10 +20793,12 @@ pub unsafe fn _mm512_mask_srai_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19610,13 +20809,11 @@ pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srai_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19627,9 +20824,11 @@ pub unsafe fn _mm256_mask_srai_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { - let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19640,13 +20839,11 @@ pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srai_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19657,9 +20854,11 @@ pub unsafe fn _mm_mask_srai_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { - let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -19670,9 +20869,11 @@ pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_srai_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64))) +pub fn _mm512_srai_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64))) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19683,14 +20884,12 @@ pub unsafe fn _mm512_srai_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_srai_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19701,10 +20900,12 @@ pub unsafe fn _mm512_mask_srai_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -19715,9 +20916,11 @@ pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_srai_epi64(a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64))) +pub fn _mm256_srai_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64))) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19728,14 +20931,12 @@ pub unsafe fn _mm256_srai_epi64(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_srai_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_srai_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19746,10 +20947,12 @@ pub unsafe fn _mm256_mask_srai_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -19760,9 +20963,11 @@ pub unsafe fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_srai_epi64(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64))) +pub fn _mm_srai_epi64(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64))) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19773,14 +20978,12 @@ pub unsafe fn _mm_srai_epi64(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_srai_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_srai_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19791,10 +20994,12 @@ pub unsafe fn _mm_mask_srai_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -19804,8 +21009,8 @@ pub unsafe fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) +pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19815,14 +21020,11 @@ pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_mask_srav_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srav_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19832,9 +21034,11 @@ pub unsafe fn _mm512_mask_srav_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srav_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19844,14 +21048,11 @@ pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm256_mask_srav_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srav_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19861,9 +21062,11 @@ pub unsafe fn _mm256_mask_srav_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srav_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19873,14 +21076,11 @@ pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm_mask_srav_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srav_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19890,9 +21090,11 @@ pub unsafe fn _mm_mask_srav_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srav_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -19902,8 +21104,8 @@ pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) +pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19913,14 +21115,11 @@ pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_mask_srav_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srav_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19930,9 +21129,11 @@ pub unsafe fn _mm512_mask_srav_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srav_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -19942,8 +21143,8 @@ pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { - transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) +pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { + unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19953,14 +21154,11 @@ pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm256_mask_srav_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srav_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -19970,9 +21168,11 @@ pub unsafe fn _mm256_mask_srav_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srav_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srav_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -19982,8 +21182,8 @@ pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19993,14 +21193,11 @@ pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm_mask_srav_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srav_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20010,9 +21207,11 @@ pub unsafe fn _mm_mask_srav_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srav_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srav_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20022,8 +21221,8 @@ pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20033,14 +21232,11 @@ pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_mask_rolv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let rol = _mm512_rolv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, rol, src.as_i32x16())) +pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20050,9 +21246,11 @@ pub unsafe fn _mm512_mask_rolv_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, rol, i32x16::ZERO)) +pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, i32x16::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20062,8 +21260,8 @@ pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20073,9 +21271,11 @@ pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let rol = _mm256_rolv_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, rol, src.as_i32x8())) +pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, rol, src.as_i32x8())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20085,9 +21285,11 @@ pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let rol = _mm256_rolv_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, rol, i32x8::ZERO)) +pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, rol, i32x8::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20097,8 +21299,8 @@ pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20108,9 +21310,11 @@ pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let rol = _mm_rolv_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, rol, src.as_i32x4())) +pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, rol, src.as_i32x4())) + } } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20120,9 +21324,11 @@ pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let rol = _mm_rolv_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, rol, i32x4::ZERO)) +pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, rol, i32x4::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20132,8 +21338,8 @@ pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20143,14 +21349,11 @@ pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_mask_rorv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let ror = _mm512_rorv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, ror, src.as_i32x16())) +pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20160,9 +21363,11 @@ pub unsafe fn _mm512_mask_rorv_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, ror, i32x16::ZERO)) +pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, i32x16::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20172,8 +21377,8 @@ pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20183,9 +21388,11 @@ pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let ror = _mm256_rorv_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, ror, src.as_i32x8())) +pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, ror, src.as_i32x8())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20195,9 +21402,11 @@ pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let ror = _mm256_rorv_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, ror, i32x8::ZERO)) +pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, ror, i32x8::ZERO)) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20207,8 +21416,8 @@ pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20218,9 +21427,11 @@ pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let ror = _mm_rorv_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, ror, src.as_i32x4())) +pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, ror, src.as_i32x4())) + } } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20230,9 +21441,11 @@ pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let ror = _mm_rorv_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, ror, i32x4::ZERO)) +pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, ror, i32x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20242,8 +21455,8 @@ pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20253,9 +21466,11 @@ pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, rol, src.as_i64x8())) +pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20265,9 +21480,11 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, rol, i64x8::ZERO)) +pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, i64x8::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20277,8 +21494,8 @@ pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20288,9 +21505,11 @@ pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let rol = _mm256_rolv_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, rol, src.as_i64x4())) +pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, rol, src.as_i64x4())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20300,9 +21519,11 @@ pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let rol = _mm256_rolv_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, rol, i64x4::ZERO)) +pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let rol = _mm256_rolv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, rol, i64x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20312,8 +21533,8 @@ pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20323,9 +21544,11 @@ pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let rol = _mm_rolv_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, rol, src.as_i64x2())) +pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, rol, src.as_i64x2())) + } } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20335,9 +21558,11 @@ pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let rol = _mm_rolv_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, rol, i64x2::ZERO)) +pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let rol = _mm_rolv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, rol, i64x2::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20347,8 +21572,8 @@ pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20358,9 +21583,11 @@ pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, ror, src.as_i64x8())) +pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20370,9 +21597,11 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, ror, i64x8::ZERO)) +pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, i64x8::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20382,8 +21611,8 @@ pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20393,9 +21622,11 @@ pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let ror = _mm256_rorv_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, ror, src.as_i64x4())) +pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, ror, src.as_i64x4())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20405,9 +21636,11 @@ pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let ror = _mm256_rorv_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, ror, i64x4::ZERO)) +pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let ror = _mm256_rorv_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, ror, i64x4::ZERO)) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20417,8 +21650,8 @@ pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20428,9 +21661,11 @@ pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let ror = _mm_rorv_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, ror, src.as_i64x2())) +pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, ror, src.as_i64x2())) + } } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20440,9 +21675,11 @@ pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let ror = _mm_rorv_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, ror, i64x2::ZERO)) +pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let ror = _mm_rorv_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, ror, i64x2::ZERO)) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -20452,8 +21689,8 @@ pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) +pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20463,14 +21700,11 @@ pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_mask_sllv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_sllv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20480,9 +21714,11 @@ pub unsafe fn _mm512_mask_sllv_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_sllv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20492,14 +21728,11 @@ pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm256_mask_sllv_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_sllv_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20509,9 +21742,11 @@ pub unsafe fn _mm256_mask_sllv_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_sllv_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20521,14 +21756,11 @@ pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm_mask_sllv_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_sllv_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20538,9 +21770,11 @@ pub unsafe fn _mm_mask_sllv_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sllv_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -20550,8 +21784,8 @@ pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) +pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20561,14 +21795,11 @@ pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_mask_srlv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srlv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20578,9 +21809,11 @@ pub unsafe fn _mm512_mask_srlv_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srlv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20590,14 +21823,11 @@ pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm256_mask_srlv_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srlv_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) +pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20607,9 +21837,11 @@ pub unsafe fn _mm256_mask_srlv_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srlv_epi32(a, count).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi32(a, count).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20619,14 +21851,11 @@ pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm_mask_srlv_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srlv_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) +pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20636,9 +21865,11 @@ pub unsafe fn _mm_mask_srlv_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srlv_epi32(a, count).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi32(a, count).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -20648,8 +21879,8 @@ pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) +pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20659,14 +21890,11 @@ pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_mask_sllv_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_sllv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20676,9 +21904,11 @@ pub unsafe fn _mm512_mask_sllv_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_sllv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20688,14 +21918,11 @@ pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm256_mask_sllv_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_sllv_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20705,9 +21932,11 @@ pub unsafe fn _mm256_mask_sllv_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_sllv_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_sllv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20717,14 +21946,11 @@ pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm_mask_sllv_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_sllv_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20734,9 +21960,11 @@ pub unsafe fn _mm_mask_sllv_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_sllv_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_sllv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -20746,8 +21974,8 @@ pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) +pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { + unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20757,14 +21985,11 @@ pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_mask_srlv_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srlv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20774,9 +21999,11 @@ pub unsafe fn _mm512_mask_srlv_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srlv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + unsafe { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20786,14 +22013,11 @@ pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm256_mask_srlv_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - count: __m256i, -) -> __m256i { - let shf = _mm256_srlv_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) +pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20803,9 +22027,11 @@ pub unsafe fn _mm256_mask_srlv_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { - let shf = _mm256_srlv_epi64(a, count).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { + unsafe { + let shf = _mm256_srlv_epi64(a, count).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20815,14 +22041,11 @@ pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm_mask_srlv_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, - count: __m128i, -) -> __m128i { - let shf = _mm_srlv_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) +pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20832,9 +22055,11 @@ pub unsafe fn _mm_mask_srlv_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { - let shf = _mm_srlv_epi64(a, count).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { + unsafe { + let shf = _mm_srlv_epi64(a, count).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -20845,30 +22070,32 @@ pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_permute_ps(a: __m512) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11), - ((MASK as u32 >> 6) & 0b11), - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - (MASK as u32 & 0b11) + 8, - ((MASK as u32 >> 2) & 0b11) + 8, - ((MASK as u32 >> 4) & 0b11) + 8, - ((MASK as u32 >> 6) & 0b11) + 8, - (MASK as u32 & 0b11) + 12, - ((MASK as u32 >> 2) & 0b11) + 12, - ((MASK as u32 >> 4) & 0b11) + 12, - ((MASK as u32 >> 6) & 0b11) + 12, - ], - ) +pub fn _mm512_permute_ps(a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20879,14 +22106,12 @@ pub unsafe fn _mm512_permute_ps(a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_permute_ps( - src: __m512, - k: __mmask16, - a: __m512, -) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) +pub fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20897,10 +22122,12 @@ pub unsafe fn _mm512_mask_permute_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) +pub fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20911,13 +22138,11 @@ pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_permute_ps( - src: __m256, - k: __mmask8, - a: __m256, -) -> __m256 { - let r = _mm256_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) +pub fn _mm256_mask_permute_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let r = _mm256_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20928,9 +22153,11 @@ pub unsafe fn _mm256_mask_permute_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256) -> __m256 { - let r = _mm256_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let r = _mm256_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20941,9 +22168,11 @@ pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - let r = _mm_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let r = _mm_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -20954,9 +22183,11 @@ pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> __m128 { - let r = _mm_permute_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let r = _mm_permute_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -20967,22 +22198,24 @@ pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_permute_pd(a: __m512d) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b1, - ((MASK as u32 >> 1) & 0b1), - ((MASK as u32 >> 2) & 0b1) + 2, - ((MASK as u32 >> 3) & 0b1) + 2, - ((MASK as u32 >> 4) & 0b1) + 4, - ((MASK as u32 >> 5) & 0b1) + 4, - ((MASK as u32 >> 6) & 0b1) + 6, - ((MASK as u32 >> 7) & 0b1) + 6, - ], - ) +pub fn _mm512_permute_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1), + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 2, + ((MASK as u32 >> 4) & 0b1) + 4, + ((MASK as u32 >> 5) & 0b1) + 4, + ((MASK as u32 >> 6) & 0b1) + 6, + ((MASK as u32 >> 7) & 0b1) + 6, + ], + ) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20993,14 +22226,12 @@ pub unsafe fn _mm512_permute_pd(a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_permute_pd( - src: __m512d, - k: __mmask8, - a: __m512d, -) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) +pub fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21011,10 +22242,12 @@ pub unsafe fn _mm512_mask_permute_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) +pub fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21025,14 +22258,12 @@ pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_permute_pd( - src: __m256d, - k: __mmask8, - a: __m256d, -) -> __m256d { - static_assert_uimm_bits!(MASK, 4); - let r = _mm256_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) +pub fn _mm256_mask_permute_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 4); + let r = _mm256_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21043,10 +22274,12 @@ pub unsafe fn _mm256_mask_permute_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d) -> __m256d { - static_assert_uimm_bits!(MASK, 4); - let r = _mm256_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) +pub fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 4); + let r = _mm256_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21057,14 +22290,12 @@ pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_permute_pd( - src: __m128d, - k: __mmask8, - a: __m128d, -) -> __m128d { - static_assert_uimm_bits!(IMM2, 2); - let r = _mm_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) +pub fn _mm_mask_permute_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21075,10 +22306,12 @@ pub unsafe fn _mm_mask_permute_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM2, 2); - let r = _mm_permute_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) +pub fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm_permute_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21089,22 +22322,24 @@ pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_permutex_epi64(a: __m512i) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11), - ((MASK as u32 >> 6) & 0b11), - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - ], - ) +pub fn _mm512_permutex_epi64(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21115,14 +22350,16 @@ pub unsafe fn _mm512_permutex_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_permutex_epi64( +pub fn _mm512_mask_permutex_epi64( src: __m512i, k: __mmask8, a: __m512i, ) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permutex_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21133,10 +22370,12 @@ pub unsafe fn _mm512_mask_permutex_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_permutex_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21147,18 +22386,20 @@ pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_permutex_epi64(a: __m256i) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11), - ((MASK as u32 >> 6) & 0b11), - ], - ) +pub fn _mm256_permutex_epi64(a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + ], + ) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21169,14 +22410,16 @@ pub unsafe fn _mm256_permutex_epi64(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_permutex_epi64( +pub fn _mm256_mask_permutex_epi64( src: __m256i, k: __mmask8, a: __m256i, ) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_permutex_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21187,10 +22430,12 @@ pub unsafe fn _mm256_mask_permutex_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m256i) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_permutex_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) +pub fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21201,22 +22446,24 @@ pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_permutex_pd(a: __m512d) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11), - ((MASK as u32 >> 6) & 0b11), - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - ], - ) +pub fn _mm512_permutex_pd(a: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21227,13 +22474,11 @@ pub unsafe fn _mm512_permutex_pd(a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_permutex_pd( - src: __m512d, - k: __mmask8, - a: __m512d, -) -> __m512d { - let r = _mm512_permutex_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) +pub fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let r = _mm512_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21244,9 +22489,11 @@ pub unsafe fn _mm512_mask_permutex_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) -> __m512d { - let r = _mm512_permutex_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) +pub fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let r = _mm512_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21257,18 +22504,20 @@ pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_permutex_pd(a: __m256d) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - a, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11), - ((MASK as u32 >> 6) & 0b11), - ], - ) +pub fn _mm256_permutex_pd(a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + a, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11), + ((MASK as u32 >> 6) & 0b11), + ], + ) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21279,14 +22528,12 @@ pub unsafe fn _mm256_permutex_pd(a: __m256d) -> __m256d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_permutex_pd( - src: __m256d, - k: __mmask8, - a: __m256d, -) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_permutex_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) +pub fn _mm256_mask_permutex_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21297,10 +22544,12 @@ pub unsafe fn _mm256_mask_permutex_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_permutex_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) +pub fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_permutex_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name. @@ -21310,8 +22559,8 @@ pub unsafe fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermd -pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i { - transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) +pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name. @@ -21321,14 +22570,16 @@ pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermd))] -pub unsafe fn _mm512_mask_permutevar_epi32( +pub fn _mm512_mask_permutevar_epi32( src: __m512i, k: __mmask16, idx: __m512i, a: __m512i, ) -> __m512i { - let permute = _mm512_permutevar_epi32(idx, a).as_i32x16(); - transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + unsafe { + let permute = _mm512_permutevar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst. @@ -21338,8 +22589,8 @@ pub unsafe fn _mm512_mask_permutevar_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 { - transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) +pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 { + unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21349,14 +22600,11 @@ pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm512_mask_permutevar_ps( - src: __m512, - k: __mmask16, - a: __m512, - b: __m512i, -) -> __m512 { - let permute = _mm512_permutevar_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, permute, src.as_f32x16())) +pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 { + unsafe { + let permute = _mm512_permutevar_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, src.as_f32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21366,9 +22614,11 @@ pub unsafe fn _mm512_mask_permutevar_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 { - let permute = _mm512_permutevar_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) +pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 { + unsafe { + let permute = _mm512_permutevar_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21378,9 +22628,11 @@ pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 { - let permute = _mm256_permutevar_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, permute, src.as_f32x8())) +pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 { + unsafe { + let permute = _mm256_permutevar_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, src.as_f32x8())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21390,9 +22642,11 @@ pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 { - let permute = _mm256_permutevar_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) +pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 { + unsafe { + let permute = _mm256_permutevar_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21402,9 +22656,11 @@ pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 { - let permute = _mm_permutevar_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, permute, src.as_f32x4())) +pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 { + unsafe { + let permute = _mm_permutevar_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, src.as_f32x4())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21414,9 +22670,11 @@ pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilps))] -pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 { - let permute = _mm_permutevar_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) +pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 { + unsafe { + let permute = _mm_permutevar_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst. @@ -21426,8 +22684,8 @@ pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d { - transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) +pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d { + unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21437,14 +22695,11 @@ pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm512_mask_permutevar_pd( - src: __m512d, - k: __mmask8, - a: __m512d, - b: __m512i, -) -> __m512d { - let permute = _mm512_permutevar_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, permute, src.as_f64x8())) +pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d { + unsafe { + let permute = _mm512_permutevar_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, src.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21454,9 +22709,11 @@ pub unsafe fn _mm512_mask_permutevar_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d { - let permute = _mm512_permutevar_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) +pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d { + unsafe { + let permute = _mm512_permutevar_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21466,14 +22723,11 @@ pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm256_mask_permutevar_pd( - src: __m256d, - k: __mmask8, - a: __m256d, - b: __m256i, -) -> __m256d { - let permute = _mm256_permutevar_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, permute, src.as_f64x4())) +pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d { + unsafe { + let permute = _mm256_permutevar_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, src.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21483,9 +22737,11 @@ pub unsafe fn _mm256_mask_permutevar_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d { - let permute = _mm256_permutevar_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) +pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d { + unsafe { + let permute = _mm256_permutevar_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21495,9 +22751,11 @@ pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d { - let permute = _mm_permutevar_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, permute, src.as_f64x2())) +pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d { + unsafe { + let permute = _mm_permutevar_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, src.as_f64x2())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21507,9 +22765,11 @@ pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermilpd))] -pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d { - let permute = _mm_permutevar_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) +pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d { + unsafe { + let permute = _mm_permutevar_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) + } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21519,8 +22779,8 @@ pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermd -pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i { - transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) +pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21530,14 +22790,16 @@ pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermd))] -pub unsafe fn _mm512_mask_permutexvar_epi32( +pub fn _mm512_mask_permutexvar_epi32( src: __m512i, k: __mmask16, idx: __m512i, a: __m512i, ) -> __m512i { - let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); - transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + unsafe { + let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, src.as_i32x16())) + } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21547,9 +22809,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermd))] -pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i { - let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); - transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) +pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) + } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21559,7 +22823,7 @@ pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m5 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermd -pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i { +pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i { _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd } @@ -21570,14 +22834,16 @@ pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermd))] -pub unsafe fn _mm256_mask_permutexvar_epi32( +pub fn _mm256_mask_permutexvar_epi32( src: __m256i, k: __mmask8, idx: __m256i, a: __m256i, ) -> __m256i { - let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); - transmute(simd_select_bitmask(k, permute, src.as_i32x8())) + unsafe { + let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); + transmute(simd_select_bitmask(k, permute, src.as_i32x8())) + } } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21587,9 +22853,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermd))] -pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { - let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); - transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) +pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) + } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21599,8 +22867,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m25 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermq -pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i { - transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) +pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21610,14 +22878,16 @@ pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermq))] -pub unsafe fn _mm512_mask_permutexvar_epi64( +pub fn _mm512_mask_permutexvar_epi64( src: __m512i, k: __mmask8, idx: __m512i, a: __m512i, ) -> __m512i { - let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); - transmute(simd_select_bitmask(k, permute, src.as_i64x8())) + unsafe { + let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); + transmute(simd_select_bitmask(k, permute, src.as_i64x8())) + } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21627,9 +22897,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermq))] -pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i { - let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); - transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) +pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) + } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21639,8 +22911,8 @@ pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m51 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermq -pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i { - transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) +pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21650,14 +22922,16 @@ pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermq))] -pub unsafe fn _mm256_mask_permutexvar_epi64( +pub fn _mm256_mask_permutexvar_epi64( src: __m256i, k: __mmask8, idx: __m256i, a: __m256i, ) -> __m256i { - let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); - transmute(simd_select_bitmask(k, permute, src.as_i64x4())) + unsafe { + let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); + transmute(simd_select_bitmask(k, permute, src.as_i64x4())) + } } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21667,9 +22941,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermq))] -pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { - let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); - transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) +pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. @@ -21679,8 +22955,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m25 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 { - transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) +pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 { + unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) } } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21690,14 +22966,11 @@ pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm512_mask_permutexvar_ps( - src: __m512, - k: __mmask16, - idx: __m512i, - a: __m512, -) -> __m512 { - let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); - transmute(simd_select_bitmask(k, permute, src.as_f32x16())) +pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); + transmute(simd_select_bitmask(k, permute, src.as_f32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21707,9 +22980,11 @@ pub unsafe fn _mm512_mask_permutexvar_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 { - let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); - transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) +pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. @@ -21719,7 +22994,7 @@ pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 { +pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 { _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps } @@ -21730,14 +23005,11 @@ pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm256_mask_permutexvar_ps( - src: __m256, - k: __mmask8, - idx: __m256i, - a: __m256, -) -> __m256 { - let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); - transmute(simd_select_bitmask(k, permute, src.as_f32x8())) +pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); + transmute(simd_select_bitmask(k, permute, src.as_f32x8())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21747,9 +23019,11 @@ pub unsafe fn _mm256_mask_permutexvar_ps( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermps))] -pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 { - let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); - transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) +pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21759,8 +23033,8 @@ pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d { - transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) +pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d { + unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21770,14 +23044,11 @@ pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm512_mask_permutexvar_pd( - src: __m512d, - k: __mmask8, - idx: __m512i, - a: __m512d, -) -> __m512d { - let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); - transmute(simd_select_bitmask(k, permute, src.as_f64x8())) +pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); + transmute(simd_select_bitmask(k, permute, src.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21787,9 +23058,11 @@ pub unsafe fn _mm512_mask_permutexvar_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { - let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); - transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) +pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. @@ -21799,8 +23072,8 @@ pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d { - transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) +pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d { + unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21810,14 +23083,11 @@ pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm256_mask_permutexvar_pd( - src: __m256d, - k: __mmask8, - idx: __m256i, - a: __m256d, -) -> __m256d { - let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); - transmute(simd_select_bitmask(k, permute, src.as_f64x4())) +pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); + transmute(simd_select_bitmask(k, permute, src.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21827,9 +23097,11 @@ pub unsafe fn _mm256_mask_permutexvar_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermpd))] -pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { - let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); - transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) +pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -21839,8 +23111,8 @@ pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { - transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) +pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -21850,14 +23122,16 @@ pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2d))] -pub unsafe fn _mm512_mask_permutex2var_epi32( +pub fn _mm512_mask_permutex2var_epi32( a: __m512i, k: __mmask16, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); - transmute(simd_select_bitmask(k, permute, a.as_i32x16())) + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, a.as_i32x16())) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21867,14 +23141,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm512_maskz_permutex2var_epi32( +pub fn _mm512_maskz_permutex2var_epi32( k: __mmask16, a: __m512i, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); - transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -21884,14 +23160,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2d))] -pub unsafe fn _mm512_mask2_permutex2var_epi32( +pub fn _mm512_mask2_permutex2var_epi32( a: __m512i, idx: __m512i, k: __mmask16, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); - transmute(simd_select_bitmask(k, permute, idx.as_i32x16())) + unsafe { + let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x16())) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -21901,8 +23179,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { - transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) +pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -21912,14 +23190,16 @@ pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2d))] -pub unsafe fn _mm256_mask_permutex2var_epi32( +pub fn _mm256_mask_permutex2var_epi32( a: __m256i, k: __mmask8, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); - transmute(simd_select_bitmask(k, permute, a.as_i32x8())) + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, a.as_i32x8())) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21929,14 +23209,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm256_maskz_permutex2var_epi32( +pub fn _mm256_maskz_permutex2var_epi32( k: __mmask8, a: __m256i, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); - transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -21946,14 +23228,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2d))] -pub unsafe fn _mm256_mask2_permutex2var_epi32( +pub fn _mm256_mask2_permutex2var_epi32( a: __m256i, idx: __m256i, k: __mmask8, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); - transmute(simd_select_bitmask(k, permute, idx.as_i32x8())) + unsafe { + let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x8())) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -21963,8 +23247,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { - transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) +pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -21974,14 +23258,11 @@ pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2d))] -pub unsafe fn _mm_mask_permutex2var_epi32( - a: __m128i, - k: __mmask8, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); - transmute(simd_select_bitmask(k, permute, a.as_i32x4())) +pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, a.as_i32x4())) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -21991,14 +23272,11 @@ pub unsafe fn _mm_mask_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d -pub unsafe fn _mm_maskz_permutex2var_epi32( - k: __mmask8, - a: __m128i, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); - transmute(simd_select_bitmask(k, permute, i32x4::ZERO)) +pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, i32x4::ZERO)) + } } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22008,14 +23286,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2d))] -pub unsafe fn _mm_mask2_permutex2var_epi32( - a: __m128i, - idx: __m128i, - k: __mmask8, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); - transmute(simd_select_bitmask(k, permute, idx.as_i32x4())) +pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); + transmute(simd_select_bitmask(k, permute, idx.as_i32x4())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22025,8 +23300,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { - transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) +pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22036,14 +23311,16 @@ pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2q))] -pub unsafe fn _mm512_mask_permutex2var_epi64( +pub fn _mm512_mask_permutex2var_epi64( a: __m512i, k: __mmask8, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); - transmute(simd_select_bitmask(k, permute, a.as_i64x8())) + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, a.as_i64x8())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22053,14 +23330,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm512_maskz_permutex2var_epi64( +pub fn _mm512_maskz_permutex2var_epi64( k: __mmask8, a: __m512i, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); - transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22070,14 +23349,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2q))] -pub unsafe fn _mm512_mask2_permutex2var_epi64( +pub fn _mm512_mask2_permutex2var_epi64( a: __m512i, idx: __m512i, k: __mmask8, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); - transmute(simd_select_bitmask(k, permute, idx.as_i64x8())) + unsafe { + let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x8())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22087,8 +23368,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { - transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) +pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22098,14 +23379,16 @@ pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2q))] -pub unsafe fn _mm256_mask_permutex2var_epi64( +pub fn _mm256_mask_permutex2var_epi64( a: __m256i, k: __mmask8, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); - transmute(simd_select_bitmask(k, permute, a.as_i64x4())) + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, a.as_i64x4())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22115,14 +23398,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm256_maskz_permutex2var_epi64( +pub fn _mm256_maskz_permutex2var_epi64( k: __mmask8, a: __m256i, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); - transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22132,14 +23417,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2q))] -pub unsafe fn _mm256_mask2_permutex2var_epi64( +pub fn _mm256_mask2_permutex2var_epi64( a: __m256i, idx: __m256i, k: __mmask8, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); - transmute(simd_select_bitmask(k, permute, idx.as_i64x4())) + unsafe { + let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x4())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22149,8 +23436,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { - transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) +pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22160,14 +23447,11 @@ pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2q))] -pub unsafe fn _mm_mask_permutex2var_epi64( - a: __m128i, - k: __mmask8, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); - transmute(simd_select_bitmask(k, permute, a.as_i64x2())) +pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, a.as_i64x2())) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22177,14 +23461,11 @@ pub unsafe fn _mm_mask_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q -pub unsafe fn _mm_maskz_permutex2var_epi64( - k: __mmask8, - a: __m128i, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); - transmute(simd_select_bitmask(k, permute, i64x2::ZERO)) +pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, i64x2::ZERO)) + } } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22194,14 +23475,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2q))] -pub unsafe fn _mm_mask2_permutex2var_epi64( - a: __m128i, - idx: __m128i, - k: __mmask8, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); - transmute(simd_select_bitmask(k, permute, idx.as_i64x2())) +pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); + transmute(simd_select_bitmask(k, permute, idx.as_i64x2())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22211,8 +23489,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 { - transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) +pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 { + unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22222,14 +23500,11 @@ pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2ps))] -pub unsafe fn _mm512_mask_permutex2var_ps( - a: __m512, - k: __mmask16, - idx: __m512i, - b: __m512, -) -> __m512 { - let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); - transmute(simd_select_bitmask(k, permute, a.as_f32x16())) +pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, a.as_f32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22239,14 +23514,11 @@ pub unsafe fn _mm512_mask_permutex2var_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm512_maskz_permutex2var_ps( - k: __mmask16, - a: __m512, - idx: __m512i, - b: __m512, -) -> __m512 { - let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); - transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) +pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22256,15 +23528,12 @@ pub unsafe fn _mm512_maskz_permutex2var_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps -pub unsafe fn _mm512_mask2_permutex2var_ps( - a: __m512, - idx: __m512i, - k: __mmask16, - b: __m512, -) -> __m512 { - let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); - let idx = _mm512_castsi512_ps(idx).as_f32x16(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 { + unsafe { + let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); + let idx = _mm512_castsi512_ps(idx).as_f32x16(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22274,8 +23543,8 @@ pub unsafe fn _mm512_mask2_permutex2var_ps( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 { - transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) +pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 { + unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22285,14 +23554,11 @@ pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2ps))] -pub unsafe fn _mm256_mask_permutex2var_ps( - a: __m256, - k: __mmask8, - idx: __m256i, - b: __m256, -) -> __m256 { - let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); - transmute(simd_select_bitmask(k, permute, a.as_f32x8())) +pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, a.as_f32x8())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22302,14 +23568,11 @@ pub unsafe fn _mm256_mask_permutex2var_ps( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm256_maskz_permutex2var_ps( - k: __mmask8, - a: __m256, - idx: __m256i, - b: __m256, -) -> __m256 { - let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); - transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) +pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22319,15 +23582,12 @@ pub unsafe fn _mm256_maskz_permutex2var_ps( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps -pub unsafe fn _mm256_mask2_permutex2var_ps( - a: __m256, - idx: __m256i, - k: __mmask8, - b: __m256, -) -> __m256 { - let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); - let idx = _mm256_castsi256_ps(idx).as_f32x8(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 { + unsafe { + let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); + let idx = _mm256_castsi256_ps(idx).as_f32x8(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22337,8 +23597,8 @@ pub unsafe fn _mm256_mask2_permutex2var_ps( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 { - transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) +pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 { + unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22348,9 +23608,11 @@ pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2ps))] -pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 { - let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); - transmute(simd_select_bitmask(k, permute, a.as_f32x4())) +pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, a.as_f32x4())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22360,9 +23622,11 @@ pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps -pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 { - let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); - transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) +pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22372,10 +23636,12 @@ pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps -pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 { - let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); - let idx = _mm_castsi128_ps(idx).as_f32x4(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 { + unsafe { + let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); + let idx = _mm_castsi128_ps(idx).as_f32x4(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22385,8 +23651,8 @@ pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d { - transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) +pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d { + unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22396,14 +23662,11 @@ pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2pd))] -pub unsafe fn _mm512_mask_permutex2var_pd( - a: __m512d, - k: __mmask8, - idx: __m512i, - b: __m512d, -) -> __m512d { - let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); - transmute(simd_select_bitmask(k, permute, a.as_f64x8())) +pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, a.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22413,14 +23676,11 @@ pub unsafe fn _mm512_mask_permutex2var_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm512_maskz_permutex2var_pd( - k: __mmask8, - a: __m512d, - idx: __m512i, - b: __m512d, -) -> __m512d { - let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); - transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) +pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -22430,15 +23690,12 @@ pub unsafe fn _mm512_maskz_permutex2var_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd -pub unsafe fn _mm512_mask2_permutex2var_pd( - a: __m512d, - idx: __m512i, - k: __mmask8, - b: __m512d, -) -> __m512d { - let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); - let idx = _mm512_castsi512_pd(idx).as_f64x8(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d { + unsafe { + let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); + let idx = _mm512_castsi512_pd(idx).as_f64x8(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22448,8 +23705,8 @@ pub unsafe fn _mm512_mask2_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d { - transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) +pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d { + unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22459,14 +23716,11 @@ pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2pd))] -pub unsafe fn _mm256_mask_permutex2var_pd( - a: __m256d, - k: __mmask8, - idx: __m256i, - b: __m256d, -) -> __m256d { - let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); - transmute(simd_select_bitmask(k, permute, a.as_f64x4())) +pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, a.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22476,14 +23730,11 @@ pub unsafe fn _mm256_mask_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm256_maskz_permutex2var_pd( - k: __mmask8, - a: __m256d, - idx: __m256i, - b: __m256d, -) -> __m256d { - let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); - transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) +pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -22493,15 +23744,12 @@ pub unsafe fn _mm256_maskz_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd -pub unsafe fn _mm256_mask2_permutex2var_pd( - a: __m256d, - idx: __m256i, - k: __mmask8, - b: __m256d, -) -> __m256d { - let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); - let idx = _mm256_castsi256_pd(idx).as_f64x4(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d { + unsafe { + let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); + let idx = _mm256_castsi256_pd(idx).as_f64x4(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22511,8 +23759,8 @@ pub unsafe fn _mm256_mask2_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d { - transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) +pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d { + unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22522,14 +23770,11 @@ pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2pd))] -pub unsafe fn _mm_mask_permutex2var_pd( - a: __m128d, - k: __mmask8, - idx: __m128i, - b: __m128d, -) -> __m128d { - let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); - transmute(simd_select_bitmask(k, permute, a.as_f64x2())) +pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, a.as_f64x2())) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22539,14 +23784,11 @@ pub unsafe fn _mm_mask_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd -pub unsafe fn _mm_maskz_permutex2var_pd( - k: __mmask8, - a: __m128d, - idx: __m128i, - b: __m128d, -) -> __m128d { - let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); - transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) +pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -22556,15 +23798,12 @@ pub unsafe fn _mm_maskz_permutex2var_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd -pub unsafe fn _mm_mask2_permutex2var_pd( - a: __m128d, - idx: __m128i, - k: __mmask8, - b: __m128d, -) -> __m128d { - let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); - let idx = _mm_castsi128_pd(idx).as_f64x2(); - transmute(simd_select_bitmask(k, permute, idx)) +pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d { + unsafe { + let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); + let idx = _mm_castsi128_pd(idx).as_f64x2(); + transmute(simd_select_bitmask(k, permute, idx)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -22575,31 +23814,33 @@ pub unsafe fn _mm_mask2_permutex2var_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r: i32x16 = simd_shuffle!( - a.as_i32x16(), - a.as_i32x16(), - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - (MASK as u32 >> 4) & 0b11, - (MASK as u32 >> 6) & 0b11, - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - (MASK as u32 & 0b11) + 8, - ((MASK as u32 >> 2) & 0b11) + 8, - ((MASK as u32 >> 4) & 0b11) + 8, - ((MASK as u32 >> 6) & 0b11) + 8, - (MASK as u32 & 0b11) + 12, - ((MASK as u32 >> 2) & 0b11) + 12, - ((MASK as u32 >> 4) & 0b11) + 12, - ((MASK as u32 >> 6) & 0b11) + 12, - ], - ); - transmute(r) +pub fn _mm512_shuffle_epi32(a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r: i32x16 = simd_shuffle!( + a.as_i32x16(), + a.as_i32x16(), + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + (MASK as u32 >> 4) & 0b11, + (MASK as u32 >> 6) & 0b11, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 8, + ((MASK as u32 >> 6) & 0b11) + 8, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 12, + ((MASK as u32 >> 6) & 0b11) + 12, + ], + ); + transmute(r) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22610,14 +23851,16 @@ pub unsafe fn _mm512_shuffle_epi32(a: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_shuffle_epi32( +pub fn _mm512_mask_shuffle_epi32( src: __m512i, k: __mmask16, a: __m512i, ) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22628,13 +23871,12 @@ pub unsafe fn _mm512_mask_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_shuffle_epi32( - k: __mmask16, - a: __m512i, -) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) +pub fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22645,14 +23887,16 @@ pub unsafe fn _mm512_maskz_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_shuffle_epi32( +pub fn _mm256_mask_shuffle_epi32( src: __m256i, k: __mmask8, a: __m256i, ) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22663,13 +23907,12 @@ pub unsafe fn _mm256_mask_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_shuffle_epi32( - k: __mmask8, - a: __m256i, -) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) +pub fn _mm256_maskz_shuffle_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22680,14 +23923,16 @@ pub unsafe fn _mm256_maskz_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_mask_shuffle_epi32( +pub fn _mm_mask_shuffle_epi32( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22698,13 +23943,12 @@ pub unsafe fn _mm_mask_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_maskz_shuffle_epi32( - k: __mmask8, - a: __m128i, -) -> __m128i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) +pub fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -22715,30 +23959,32 @@ pub unsafe fn _mm_maskz_shuffle_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11) + 16, - ((MASK as u32 >> 6) & 0b11) + 16, - (MASK as u32 & 0b11) + 4, - ((MASK as u32 >> 2) & 0b11) + 4, - ((MASK as u32 >> 4) & 0b11) + 20, - ((MASK as u32 >> 6) & 0b11) + 20, - (MASK as u32 & 0b11) + 8, - ((MASK as u32 >> 2) & 0b11) + 8, - ((MASK as u32 >> 4) & 0b11) + 24, - ((MASK as u32 >> 6) & 0b11) + 24, - (MASK as u32 & 0b11) + 12, - ((MASK as u32 >> 2) & 0b11) + 12, - ((MASK as u32 >> 4) & 0b11) + 28, - ((MASK as u32 >> 6) & 0b11) + 28, - ], - ) +pub fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 16, + ((MASK as u32 >> 6) & 0b11) + 16, + (MASK as u32 & 0b11) + 4, + ((MASK as u32 >> 2) & 0b11) + 4, + ((MASK as u32 >> 4) & 0b11) + 20, + ((MASK as u32 >> 6) & 0b11) + 20, + (MASK as u32 & 0b11) + 8, + ((MASK as u32 >> 2) & 0b11) + 8, + ((MASK as u32 >> 4) & 0b11) + 24, + ((MASK as u32 >> 6) & 0b11) + 24, + (MASK as u32 & 0b11) + 12, + ((MASK as u32 >> 2) & 0b11) + 12, + ((MASK as u32 >> 4) & 0b11) + 28, + ((MASK as u32 >> 6) & 0b11) + 28, + ], + ) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22749,15 +23995,17 @@ pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512) -> __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_ps( +pub fn _mm512_mask_shuffle_ps( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22767,15 +24015,13 @@ pub unsafe fn _mm512_mask_shuffle_ps( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_ps( - k: __mmask16, - a: __m512, - b: __m512, -) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) +#[rustc_legacy_const_generics(3)] +pub fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22786,15 +24032,17 @@ pub unsafe fn _mm512_maskz_shuffle_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_ps( +pub fn _mm256_mask_shuffle_ps( src: __m256, k: __mmask8, a: __m256, b: __m256, ) -> __m256 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22805,14 +24053,12 @@ pub unsafe fn _mm256_mask_shuffle_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_ps( - k: __mmask8, - a: __m256, - b: __m256, -) -> __m256 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_shuffle_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } } /// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22823,15 +24069,17 @@ pub unsafe fn _mm256_maskz_shuffle_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shuffle_ps( +pub fn _mm_mask_shuffle_ps( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } } /// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22842,10 +24090,12 @@ pub unsafe fn _mm_mask_shuffle_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufps, MASK = 3))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_ps::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_ps::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst. @@ -22856,22 +24106,24 @@ pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b1, - ((MASK as u32 >> 1) & 0b1) + 8, - ((MASK as u32 >> 2) & 0b1) + 2, - ((MASK as u32 >> 3) & 0b1) + 10, - ((MASK as u32 >> 4) & 0b1) + 4, - ((MASK as u32 >> 5) & 0b1) + 12, - ((MASK as u32 >> 6) & 0b1) + 6, - ((MASK as u32 >> 7) & 0b1) + 14, - ], - ) +pub fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b1, + ((MASK as u32 >> 1) & 0b1) + 8, + ((MASK as u32 >> 2) & 0b1) + 2, + ((MASK as u32 >> 3) & 0b1) + 10, + ((MASK as u32 >> 4) & 0b1) + 4, + ((MASK as u32 >> 5) & 0b1) + 12, + ((MASK as u32 >> 6) & 0b1) + 6, + ((MASK as u32 >> 7) & 0b1) + 14, + ], + ) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22882,15 +24134,17 @@ pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d) -> __m5 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_pd( +pub fn _mm512_mask_shuffle_pd( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22901,14 +24155,12 @@ pub unsafe fn _mm512_mask_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_pd( - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) +pub fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22919,15 +24171,17 @@ pub unsafe fn _mm512_maskz_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_pd( +pub fn _mm256_mask_shuffle_pd( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, ) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22938,14 +24192,12 @@ pub unsafe fn _mm256_mask_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_pd( - k: __mmask8, - a: __m256d, - b: __m256d, -) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) +pub fn _mm256_maskz_shuffle_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22956,15 +24208,17 @@ pub unsafe fn _mm256_maskz_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shuffle_pd( +pub fn _mm_mask_shuffle_pd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2())) + } } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -22975,14 +24229,12 @@ pub unsafe fn _mm_mask_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufpd, MASK = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shuffle_pd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm_shuffle_pd::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) +pub fn _mm_maskz_shuffle_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm_shuffle_pd::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -22993,33 +24245,35 @@ pub unsafe fn _mm_maskz_shuffle_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let r: i32x16 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b11) * 4 + 0, - (MASK as u32 & 0b11) * 4 + 1, - (MASK as u32 & 0b11) * 4 + 2, - (MASK as u32 & 0b11) * 4 + 3, - ((MASK as u32 >> 2) & 0b11) * 4 + 0, - ((MASK as u32 >> 2) & 0b11) * 4 + 1, - ((MASK as u32 >> 2) & 0b11) * 4 + 2, - ((MASK as u32 >> 2) & 0b11) * 4 + 3, - ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, - ], - ); - transmute(r) +pub fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r: i32x16 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23030,15 +24284,17 @@ pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_i32x4( +pub fn _mm512_mask_shuffle_i32x4( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_i32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23049,14 +24305,16 @@ pub unsafe fn _mm512_mask_shuffle_i32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_i32x4( +pub fn _mm512_maskz_shuffle_i32x4( k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_i32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -23067,25 +24325,27 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let r: i32x8 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b1) * 4 + 0, - (MASK as u32 & 0b1) * 4 + 1, - (MASK as u32 & 0b1) * 4 + 2, - (MASK as u32 & 0b1) * 4 + 3, - ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, - ], - ); - transmute(r) +pub fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r: i32x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23096,15 +24356,17 @@ pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_i32x4( +pub fn _mm256_mask_shuffle_i32x4( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_i32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23115,14 +24377,12 @@ pub unsafe fn _mm256_mask_shuffle_i32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_i32x4( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_i32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) +pub fn _mm256_maskz_shuffle_i32x4(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -23133,25 +24393,27 @@ pub unsafe fn _mm256_maskz_shuffle_i32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let r: i64x8 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b11) * 2 + 0, - (MASK as u32 & 0b11) * 2 + 1, - ((MASK as u32 >> 2) & 0b11) * 2 + 0, - ((MASK as u32 >> 2) & 0b11) * 2 + 1, - ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, - ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, - ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, - ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, - ], - ); - transmute(r) +pub fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r: i64x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23162,15 +24424,17 @@ pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_i64x2( +pub fn _mm512_mask_shuffle_i64x2( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_i64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23181,14 +24445,12 @@ pub unsafe fn _mm512_mask_shuffle_i64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_i64x2( - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_i64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_maskz_shuffle_i64x2(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -23199,21 +24461,23 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let r: i64x4 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b1) * 2 + 0, - (MASK as u32 & 0b1) * 2 + 1, - ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, - ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, - ], - ); - transmute(r) +pub fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r: i64x4 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23224,15 +24488,17 @@ pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_i64x2( +pub fn _mm256_mask_shuffle_i64x2( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_i64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23243,14 +24509,12 @@ pub unsafe fn _mm256_mask_shuffle_i64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_i64x2( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_i64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) +pub fn _mm256_maskz_shuffle_i64x2(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_i64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -23261,33 +24525,35 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r: f32x16 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b11) * 4 + 0, - (MASK as u32 & 0b11) * 4 + 1, - (MASK as u32 & 0b11) * 4 + 2, - (MASK as u32 & 0b11) * 4 + 3, - ((MASK as u32 >> 2) & 0b11) * 4 + 0, - ((MASK as u32 >> 2) & 0b11) * 4 + 1, - ((MASK as u32 >> 2) & 0b11) * 4 + 2, - ((MASK as u32 >> 2) & 0b11) * 4 + 3, - ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, - ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, - ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, - ], - ); - transmute(r) +pub fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r: f32x16 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 4 + 0, + (MASK as u32 & 0b11) * 4 + 1, + (MASK as u32 & 0b11) * 4 + 2, + (MASK as u32 & 0b11) * 4 + 3, + ((MASK as u32 >> 2) & 0b11) * 4 + 0, + ((MASK as u32 >> 2) & 0b11) * 4 + 1, + ((MASK as u32 >> 2) & 0b11) * 4 + 2, + ((MASK as u32 >> 2) & 0b11) * 4 + 3, + ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16, + ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23298,15 +24564,17 @@ pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_f32x4( +pub fn _mm512_mask_shuffle_f32x4( src: __m512, k: __mmask16, a: __m512, b: __m512, ) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_f32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23317,14 +24585,12 @@ pub unsafe fn _mm512_mask_shuffle_f32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_f32x4( - k: __mmask16, - a: __m512, - b: __m512, -) -> __m512 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_f32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) +pub fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -23335,25 +24601,27 @@ pub unsafe fn _mm512_maskz_shuffle_f32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m256 { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let r: f32x8 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b1) * 4 + 0, - (MASK as u32 & 0b1) * 4 + 1, - (MASK as u32 & 0b1) * 4 + 2, - (MASK as u32 & 0b1) * 4 + 3, - ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, - ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, - ], - ); - transmute(r) +pub fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r: f32x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 4 + 0, + (MASK as u32 & 0b1) * 4 + 1, + (MASK as u32 & 0b1) * 4 + 2, + (MASK as u32 & 0b1) * 4 + 3, + ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8, + ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23364,15 +24632,17 @@ pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_f32x4( +pub fn _mm256_mask_shuffle_f32x4( src: __m256, k: __mmask8, a: __m256, b: __m256, ) -> __m256 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_f32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23383,14 +24653,12 @@ pub unsafe fn _mm256_mask_shuffle_f32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_f32x4( - k: __mmask8, - a: __m256, - b: __m256, -) -> __m256 { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_f32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_shuffle_f32x4(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -23401,25 +24669,27 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r: f64x8 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b11) * 2 + 0, - (MASK as u32 & 0b11) * 2 + 1, - ((MASK as u32 >> 2) & 0b11) * 2 + 0, - ((MASK as u32 >> 2) & 0b11) * 2 + 1, - ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, - ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, - ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, - ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, - ], - ); - transmute(r) +pub fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r: f64x8 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b11) * 2 + 0, + (MASK as u32 & 0b11) * 2 + 1, + ((MASK as u32 >> 2) & 0b11) * 2 + 0, + ((MASK as u32 >> 2) & 0b11) * 2 + 1, + ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8, + ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23430,15 +24700,17 @@ pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shuffle_f64x2( +pub fn _mm512_mask_shuffle_f64x2( src: __m512d, k: __mmask8, a: __m512d, b: __m512d, ) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_f64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23449,14 +24721,12 @@ pub unsafe fn _mm512_mask_shuffle_f64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shuffle_f64x2( - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm512_shuffle_f64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) +pub fn _mm512_maskz_shuffle_f64x2(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm512_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -23467,21 +24737,23 @@ pub unsafe fn _mm512_maskz_shuffle_f64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let r: f64x4 = simd_shuffle!( - a, - b, - [ - (MASK as u32 & 0b1) * 2 + 0, - (MASK as u32 & 0b1) * 2 + 1, - ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, - ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, - ], - ); - transmute(r) +pub fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r: f64x4 = simd_shuffle!( + a, + b, + [ + (MASK as u32 & 0b1) * 2 + 0, + (MASK as u32 & 0b1) * 2 + 1, + ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4, + ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4, + ], + ); + transmute(r) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23492,15 +24764,17 @@ pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shuffle_f64x2( +pub fn _mm256_mask_shuffle_f64x2( src: __m256d, k: __mmask8, a: __m256d, b: __m256d, ) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_f64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23511,14 +24785,12 @@ pub unsafe fn _mm256_mask_shuffle_f64x2( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shuffle_f64x2( - k: __mmask8, - a: __m256d, - b: __m256d, -) -> __m256d { - static_assert_uimm_bits!(MASK, 8); - let r = _mm256_shuffle_f64x2::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) +pub fn _mm256_maskz_shuffle_f64x2(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + static_assert_uimm_bits!(MASK, 8); + let r = _mm256_shuffle_f64x2::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -23532,13 +24804,15 @@ pub unsafe fn _mm256_maskz_shuffle_f64x2( assert_instr(vextractf32x4, IMM8 = 3) )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_extractf32x4_ps(a: __m512) -> __m128 { - static_assert_uimm_bits!(IMM8, 2); - match IMM8 & 0x3 { - 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]), - 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]), - 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]), - _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]), +pub fn _mm512_extractf32x4_ps(a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + match IMM8 & 0x3 { + 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]), + 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]), + 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]), + _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]), + } } } @@ -23553,14 +24827,12 @@ pub unsafe fn _mm512_extractf32x4_ps(a: __m512) -> __m128 { assert_instr(vextractf32x4, IMM8 = 3) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extractf32x4_ps( - src: __m128, - k: __mmask8, - a: __m512, -) -> __m128 { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_extractf32x4_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) +pub fn _mm512_mask_extractf32x4_ps(src: __m128, k: __mmask8, a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23574,10 +24846,12 @@ pub unsafe fn _mm512_mask_extractf32x4_ps( assert_instr(vextractf32x4, IMM8 = 3) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512) -> __m128 { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_extractf32x4_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) +pub fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -23591,11 +24865,13 @@ pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m5 assert_instr(vextract, IMM8 = 1) //should be vextractf32x4 )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_extractf32x4_ps(a: __m256) -> __m128 { - static_assert_uimm_bits!(IMM8, 1); - match IMM8 & 0x1 { - 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]), - _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]), +pub fn _mm256_extractf32x4_ps(a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 0x1 { + 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]), + } } } @@ -23610,14 +24886,12 @@ pub unsafe fn _mm256_extractf32x4_ps(a: __m256) -> __m128 { assert_instr(vextractf32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_extractf32x4_ps( - src: __m128, - k: __mmask8, - a: __m256, -) -> __m128 { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_extractf32x4_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) +pub fn _mm256_mask_extractf32x4_ps(src: __m128, k: __mmask8, a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4())) + } } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23631,10 +24905,12 @@ pub unsafe fn _mm256_mask_extractf32x4_ps( assert_instr(vextractf32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m256) -> __m128 { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_extractf32x4_ps::(a); - transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) +pub fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m256) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_extractf32x4_ps::(a); + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) + } } /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst. @@ -23648,11 +24924,13 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m2 assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4 )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { - static_assert_uimm_bits!(IMM1, 1); - match IMM1 { - 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]), - _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]), +pub fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + match IMM1 { + 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]), + } } } @@ -23667,14 +24945,16 @@ pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i assert_instr(vextracti64x4, IMM1 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extracti64x4_epi64( +pub fn _mm512_mask_extracti64x4_epi64( src: __m256i, k: __mmask8, a: __m512i, ) -> __m256i { - static_assert_uimm_bits!(IMM1, 1); - let r = _mm512_extracti64x4_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm512_extracti64x4_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } } /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23688,10 +24968,12 @@ pub unsafe fn _mm512_mask_extracti64x4_epi64( assert_instr(vextracti64x4, IMM1 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { - static_assert_uimm_bits!(IMM1, 1); - let r = _mm512_extracti64x4_epi64::(a); - transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) +pub fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm512_extracti64x4_epi64::(a); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } } /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -23705,11 +24987,13 @@ pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: _ assert_instr(vextractf64x4, IMM8 = 1) )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_extractf64x4_pd(a: __m512d) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - match IMM8 & 0x1 { - 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]), - _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]), +pub fn _mm512_extractf64x4_pd(a: __m512d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + match IMM8 & 0x1 { + 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]), + } } } @@ -23724,14 +25008,16 @@ pub unsafe fn _mm512_extractf64x4_pd(a: __m512d) -> __m256d { assert_instr(vextractf64x4, IMM8 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extractf64x4_pd( +pub fn _mm512_mask_extractf64x4_pd( src: __m256d, k: __mmask8, a: __m512d, ) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_extractf64x4_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_extractf64x4_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4())) + } } /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23745,10 +25031,12 @@ pub unsafe fn _mm512_mask_extractf64x4_pd( assert_instr(vextractf64x4, IMM8 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d) -> __m256d { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_extractf64x4_pd::(a); - transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) +pub fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d) -> __m256d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_extractf64x4_pd::(a); + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst. @@ -23762,17 +25050,19 @@ pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m5 assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4 )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i { - static_assert_uimm_bits!(IMM2, 2); - let a = a.as_i32x16(); - let zero = i32x16::ZERO; - let extract: i32x4 = match IMM2 { - 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]), - 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]), - _ => simd_shuffle!(a, zero, [12, 13, 14, 15]), - }; - transmute(extract) +pub fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let a = a.as_i32x16(); + let zero = i32x16::ZERO; + let extract: i32x4 = match IMM2 { + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]), + 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]), + _ => simd_shuffle!(a, zero, [12, 13, 14, 15]), + }; + transmute(extract) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23786,14 +25076,16 @@ pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i assert_instr(vextracti32x4, IMM2 = 3) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extracti32x4_epi32( +pub fn _mm512_mask_extracti32x4_epi32( src: __m128i, k: __mmask8, a: __m512i, ) -> __m128i { - static_assert_uimm_bits!(IMM2, 2); - let r = _mm512_extracti32x4_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm512_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23807,10 +25099,12 @@ pub unsafe fn _mm512_mask_extracti32x4_epi32( assert_instr(vextracti32x4, IMM2 = 3) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { - static_assert_uimm_bits!(IMM2, 2); - let r = _mm512_extracti32x4_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) +pub fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM2, 2); + let r = _mm512_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst. @@ -23824,15 +25118,17 @@ pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: _ assert_instr(vextract, IMM1 = 1) //should be vextracti32x4 )] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i { - static_assert_uimm_bits!(IMM1, 1); - let a = a.as_i32x8(); - let zero = i32x8::ZERO; - let extract: i32x4 = match IMM1 { - 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), - _ => simd_shuffle!(a, zero, [4, 5, 6, 7]), - }; - transmute(extract) +pub fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let a = a.as_i32x8(); + let zero = i32x8::ZERO; + let extract: i32x4 = match IMM1 { + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + _ => simd_shuffle!(a, zero, [4, 5, 6, 7]), + }; + transmute(extract) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23846,14 +25142,16 @@ pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i assert_instr(vextracti32x4, IMM1 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_extracti32x4_epi32( +pub fn _mm256_mask_extracti32x4_epi32( src: __m128i, k: __mmask8, a: __m256i, ) -> __m128i { - static_assert_uimm_bits!(IMM1, 1); - let r = _mm256_extracti32x4_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm256_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23867,10 +25165,12 @@ pub unsafe fn _mm256_mask_extracti32x4_epi32( assert_instr(vextracti32x4, IMM1 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { - static_assert_uimm_bits!(IMM1, 1); - let r = _mm256_extracti32x4_epi32::(a); - transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) +pub fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM1, 1); + let r = _mm256_extracti32x4_epi32::(a); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. @@ -23880,9 +25180,11 @@ pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 { - let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); - transmute(r) +pub fn _mm512_moveldup_ps(a: __m512) -> __m512 { + unsafe { + let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(r) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23892,9 +25194,12 @@ pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); - transmute(simd_select_bitmask(k, mov, src.as_f32x16())) +pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23904,9 +25209,12 @@ pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); - transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) +pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23916,9 +25224,11 @@ pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - let mov = _mm256_moveldup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) +pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23928,9 +25238,11 @@ pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 { - let mov = _mm256_moveldup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23940,9 +25252,11 @@ pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - let mov = _mm_moveldup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) + } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23952,9 +25266,11 @@ pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsldup))] -pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { - let mov = _mm_moveldup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_moveldup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. @@ -23964,9 +25280,11 @@ pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 { - let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); - transmute(r) +pub fn _mm512_movehdup_ps(a: __m512) -> __m512 { + unsafe { + let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(r) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23976,9 +25294,12 @@ pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); - transmute(simd_select_bitmask(k, mov, src.as_f32x16())) +pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(simd_select_bitmask(k, mov, src.as_f32x16())) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -23988,9 +25309,12 @@ pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { - let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); - transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) +pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { + unsafe { + let mov: f32x16 = + simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24000,9 +25324,11 @@ pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { - let mov = _mm256_movehdup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) +pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8())) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24012,9 +25338,11 @@ pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 { - let mov = _mm256_movehdup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 { + unsafe { + let mov = _mm256_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24024,9 +25352,11 @@ pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - let mov = _mm_movehdup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) +pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4())) + } } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24036,9 +25366,11 @@ pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovshdup))] -pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { - let mov = _mm_movehdup_ps(a); - transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) +pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let mov = _mm_movehdup_ps(a); + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst. @@ -24048,9 +25380,11 @@ pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d { - let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); - transmute(r) +pub fn _mm512_movedup_pd(a: __m512d) -> __m512d { + unsafe { + let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(r) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24060,9 +25394,11 @@ pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { - let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); - transmute(simd_select_bitmask(k, mov, src.as_f64x8())) +pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(simd_select_bitmask(k, mov, src.as_f64x8())) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24072,9 +25408,11 @@ pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { - let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); - transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) +pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { + unsafe { + let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24084,9 +25422,11 @@ pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { - let mov = _mm256_movedup_pd(a); - transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4())) +pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = _mm256_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4())) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24096,9 +25436,11 @@ pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d { - let mov = _mm256_movedup_pd(a); - transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO)) +pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d { + unsafe { + let mov = _mm256_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO)) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24108,9 +25450,11 @@ pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { - let mov = _mm_movedup_pd(a); - transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2())) +pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = _mm_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2())) + } } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24120,9 +25464,11 @@ pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovddup))] -pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d { - let mov = _mm_movedup_pd(a); - transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO)) +pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d { + unsafe { + let mov = _mm_movedup_pd(a); + transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. @@ -24133,29 +25479,39 @@ pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let a = a.as_i32x16(); - let b = _mm512_castsi128_si512(b).as_i32x16(); - let ret: i32x16 = match IMM8 & 0b11 { - 0 => simd_shuffle!( - a, - b, - [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - ), - 1 => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], - ), - 2 => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], - ), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), - }; - transmute(ret) +pub fn _mm512_inserti32x4(a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let a = a.as_i32x16(); + let b = _mm512_castsi128_si512(b).as_i32x16(); + let ret: i32x16 = match IMM8 & 0b11 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 1 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 2 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], + ) + } + _ => { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]) + } + }; + transmute(ret) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24166,15 +25522,17 @@ pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_inserti32x4( +pub fn _mm512_mask_inserti32x4( src: __m512i, k: __mmask16, a: __m512i, b: __m128i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_inserti32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24185,14 +25543,12 @@ pub unsafe fn _mm512_mask_inserti32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_inserti32x4( - k: __mmask16, - a: __m512i, - b: __m128i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_inserti32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) +pub fn _mm512_maskz_inserti32x4(k: __mmask16, a: __m512i, b: __m128i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. @@ -24206,15 +25562,17 @@ pub unsafe fn _mm512_maskz_inserti32x4( assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4 )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_inserti32x4(a: __m256i, b: __m128i) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let a = a.as_i32x8(); - let b = _mm256_castsi128_si256(b).as_i32x8(); - let ret: i32x8 = match IMM8 & 0b1 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), - }; - transmute(ret) +pub fn _mm256_inserti32x4(a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let a = a.as_i32x8(); + let b = _mm256_castsi128_si256(b).as_i32x8(); + let ret: i32x8 = match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + }; + transmute(ret) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24228,15 +25586,17 @@ pub unsafe fn _mm256_inserti32x4(a: __m256i, b: __m128i) -> __m assert_instr(vinserti32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_inserti32x4( +pub fn _mm256_mask_inserti32x4( src: __m256i, k: __mmask8, a: __m256i, b: __m128i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_inserti32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24250,14 +25610,12 @@ pub unsafe fn _mm256_mask_inserti32x4( assert_instr(vinserti32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_inserti32x4( - k: __mmask8, - a: __m256i, - b: __m128i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_inserti32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) +pub fn _mm256_maskz_inserti32x4(k: __mmask8, a: __m256i, b: __m128i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_inserti32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } } /// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8. @@ -24268,12 +25626,14 @@ pub unsafe fn _mm256_maskz_inserti32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4 #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_castsi256_si512(b); - match IMM8 & 0b1 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), +pub fn _mm512_inserti64x4(a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castsi256_si512(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } } } @@ -24285,15 +25645,17 @@ pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_inserti64x4( +pub fn _mm512_mask_inserti64x4( src: __m512i, k: __mmask8, a: __m512i, b: __m256i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_inserti64x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_inserti64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } } /// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24304,14 +25666,12 @@ pub unsafe fn _mm512_mask_inserti64x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_inserti64x4( - k: __mmask8, - a: __m512i, - b: __m256i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_inserti64x4::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_maskz_inserti64x4(k: __mmask8, a: __m512i, b: __m256i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_inserti64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -24322,26 +25682,36 @@ pub unsafe fn _mm512_maskz_inserti64x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128) -> __m512 { - static_assert_uimm_bits!(IMM8, 2); - let b = _mm512_castps128_ps512(b); - match IMM8 & 0b11 { - 0 => simd_shuffle!( - a, - b, - [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - ), - 1 => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], - ), - 2 => simd_shuffle!( - a, - b, - [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], - ), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]), +pub fn _mm512_insertf32x4(a: __m512, b: __m128) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let b = _mm512_castps128_ps512(b); + match IMM8 & 0b11 { + 0 => { + simd_shuffle!( + a, + b, + [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 1 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15], + ) + } + 2 => { + simd_shuffle!( + a, + b, + [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15], + ) + } + _ => { + simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]) + } + } } } @@ -24353,15 +25723,17 @@ pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128) -> __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_insertf32x4( +pub fn _mm512_mask_insertf32x4( src: __m512, k: __mmask16, a: __m512, b: __m128, ) -> __m512 { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_insertf32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16())) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24372,14 +25744,12 @@ pub unsafe fn _mm512_mask_insertf32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_insertf32x4( - k: __mmask16, - a: __m512, - b: __m128, -) -> __m512 { - static_assert_uimm_bits!(IMM8, 2); - let r = _mm512_insertf32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) +pub fn _mm512_maskz_insertf32x4(k: __mmask16, a: __m512, b: __m128) -> __m512 { + unsafe { + static_assert_uimm_bits!(IMM8, 2); + let r = _mm512_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) + } } /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -24393,12 +25763,14 @@ pub unsafe fn _mm512_maskz_insertf32x4( assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4 )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_insertf32x4(a: __m256, b: __m128) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm256_castps128_ps256(b); - match IMM8 & 0b1 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), +pub fn _mm256_insertf32x4(a: __m256, b: __m128) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm256_castps128_ps256(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } } } @@ -24413,15 +25785,17 @@ pub unsafe fn _mm256_insertf32x4(a: __m256, b: __m128) -> __m25 assert_instr(vinsertf32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_insertf32x4( +pub fn _mm256_mask_insertf32x4( src: __m256, k: __mmask8, a: __m256, b: __m128, ) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_insertf32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8())) + } } /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24435,14 +25809,12 @@ pub unsafe fn _mm256_mask_insertf32x4( assert_instr(vinsertf32x4, IMM8 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_insertf32x4( - k: __mmask8, - a: __m256, - b: __m128, -) -> __m256 { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm256_insertf32x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) +pub fn _mm256_maskz_insertf32x4(k: __mmask8, a: __m256, b: __m128) -> __m256 { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm256_insertf32x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) + } } /// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -24453,12 +25825,14 @@ pub unsafe fn _mm256_maskz_insertf32x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d) -> __m512d { - static_assert_uimm_bits!(IMM8, 1); - let b = _mm512_castpd256_pd512(b); - match IMM8 & 0b1 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), - _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), +pub fn _mm512_insertf64x4(a: __m512d, b: __m256d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let b = _mm512_castpd256_pd512(b); + match IMM8 & 0b1 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), + _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), + } } } @@ -24470,15 +25844,17 @@ pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_insertf64x4( +pub fn _mm512_mask_insertf64x4( src: __m512d, k: __mmask8, a: __m512d, b: __m256d, ) -> __m512d { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_insertf64x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_insertf64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8())) + } } /// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24489,14 +25865,12 @@ pub unsafe fn _mm512_mask_insertf64x4( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_insertf64x4( - k: __mmask8, - a: __m512d, - b: __m256d, -) -> __m512d { - static_assert_uimm_bits!(IMM8, 1); - let r = _mm512_insertf64x4::(a, b); - transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) +pub fn _mm512_maskz_insertf64x4(k: __mmask8, a: __m512d, b: __m256d) -> __m512d { + unsafe { + static_assert_uimm_bits!(IMM8, 1); + let r = _mm512_insertf64x4::(a, b); + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -24506,18 +25880,20 @@ pub unsafe fn _mm512_maskz_insertf64x4( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq -pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i32x16(); - let b = b.as_i32x16(); - #[rustfmt::skip] - let r: i32x16 = simd_shuffle!( - a, b, - [ 2, 18, 3, 19, - 2 + 4, 18 + 4, 3 + 4, 19 + 4, - 2 + 8, 18 + 8, 3 + 8, 19 + 8, - 2 + 12, 18 + 12, 3 + 12, 19 + 12], - ); - transmute(r) +pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + #[rustfmt::skip] + let r: i32x16 = simd_shuffle!( + a, b, + [ 2, 18, 3, 19, + 2 + 4, 18 + 4, 3 + 4, 19 + 4, + 2 + 8, 18 + 8, 3 + 8, 19 + 8, + 2 + 12, 18 + 12, 3 + 12, 19 + 12], + ); + transmute(r) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24527,14 +25903,11 @@ pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm512_mask_unpackhi_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16())) +pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16())) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24544,9 +25917,11 @@ pub unsafe fn _mm512_mask_unpackhi_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO)) +pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO)) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24556,14 +25931,11 @@ pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm256_mask_unpackhi_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8())) +pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8())) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24573,9 +25945,11 @@ pub unsafe fn _mm256_mask_unpackhi_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO)) +pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO)) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24585,14 +25959,11 @@ pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm_mask_unpackhi_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4())) +pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4())) + } } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24602,9 +25973,11 @@ pub unsafe fn _mm_mask_unpackhi_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhdq))] -pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO)) +pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO)) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -24614,8 +25987,8 @@ pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq -pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i { - simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) +pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24625,14 +25998,11 @@ pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm512_mask_unpackhi_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8())) +pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8())) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24642,9 +26012,11 @@ pub unsafe fn _mm512_mask_unpackhi_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO)) +pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO)) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24654,14 +26026,11 @@ pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm256_mask_unpackhi_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4())) +pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4())) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24671,9 +26040,11 @@ pub unsafe fn _mm256_mask_unpackhi_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO)) +pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO)) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24683,14 +26054,11 @@ pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm_mask_unpackhi_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2())) +pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2())) + } } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24700,9 +26068,11 @@ pub unsafe fn _mm_mask_unpackhi_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckhqdq))] -pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO)) +pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -24712,15 +26082,17 @@ pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 { - #[rustfmt::skip] - simd_shuffle!( - a, b, - [ 2, 18, 3, 19, - 2 + 4, 18 + 4, 3 + 4, 19 + 4, - 2 + 8, 18 + 8, 3 + 8, 19 + 8, - 2 + 12, 18 + 12, 3 + 12, 19 + 12], - ) +pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + #[rustfmt::skip] + simd_shuffle!( + a, b, + [ 2, 18, 3, 19, + 2 + 4, 18 + 4, 3 + 4, 19 + 4, + 2 + 8, 18 + 8, 3 + 8, 19 + 8, + 2 + 12, 18 + 12, 3 + 12, 19 + 12], + ) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24730,9 +26102,11 @@ pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16())) +pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24742,9 +26116,11 @@ pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO)) +pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24754,9 +26130,11 @@ pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8())) +pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24766,9 +26144,11 @@ pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO)) +pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24778,9 +26158,11 @@ pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4())) +pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24790,9 +26172,11 @@ pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhps))] -pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO)) +pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -24802,8 +26186,8 @@ pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d { - simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) +pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24813,14 +26197,11 @@ pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm512_mask_unpackhi_pd( - src: __m512d, - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8())) +pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24830,9 +26211,11 @@ pub unsafe fn _mm512_mask_unpackhi_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO)) +pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24842,14 +26225,11 @@ pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm256_mask_unpackhi_pd( - src: __m256d, - k: __mmask8, - a: __m256d, - b: __m256d, -) -> __m256d { - let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4())) +pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24859,9 +26239,11 @@ pub unsafe fn _mm256_mask_unpackhi_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO)) +pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24871,9 +26253,11 @@ pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2())) +pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24883,9 +26267,11 @@ pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpckhpd))] -pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO)) +pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO)) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -24895,18 +26281,20 @@ pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq -pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i { - let a = a.as_i32x16(); - let b = b.as_i32x16(); - #[rustfmt::skip] - let r: i32x16 = simd_shuffle!( - a, b, - [ 0, 16, 1, 17, - 0 + 4, 16 + 4, 1 + 4, 17 + 4, - 0 + 8, 16 + 8, 1 + 8, 17 + 8, - 0 + 12, 16 + 12, 1 + 12, 17 + 12], - ); - transmute(r) +pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let a = a.as_i32x16(); + let b = b.as_i32x16(); + #[rustfmt::skip] + let r: i32x16 = simd_shuffle!( + a, b, + [ 0, 16, 1, 17, + 0 + 4, 16 + 4, 1 + 4, 17 + 4, + 0 + 8, 16 + 8, 1 + 8, 17 + 8, + 0 + 12, 16 + 12, 1 + 12, 17 + 12], + ); + transmute(r) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24916,14 +26304,11 @@ pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm512_mask_unpacklo_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16())) +pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16())) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24933,9 +26318,11 @@ pub unsafe fn _mm512_mask_unpacklo_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO)) +pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO)) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24945,14 +26332,11 @@ pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm256_mask_unpacklo_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8())) +pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8())) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24962,9 +26346,11 @@ pub unsafe fn _mm256_mask_unpacklo_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO)) +pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO)) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24974,14 +26360,11 @@ pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm_mask_unpacklo_epi32( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4())) +pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4())) + } } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -24991,9 +26374,11 @@ pub unsafe fn _mm_mask_unpacklo_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpckldq))] -pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO)) +pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO)) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -25003,8 +26388,8 @@ pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq -pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i { - simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) +pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25014,14 +26399,11 @@ pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm512_mask_unpacklo_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8())) +pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8())) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25031,9 +26413,11 @@ pub unsafe fn _mm512_mask_unpacklo_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO)) +pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO)) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25043,14 +26427,11 @@ pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm256_mask_unpacklo_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4())) +pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4())) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25060,9 +26441,11 @@ pub unsafe fn _mm256_mask_unpacklo_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO)) +pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO)) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25072,14 +26455,11 @@ pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm_mask_unpacklo_epi64( - src: __m128i, - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2())) +pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2())) + } } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25089,9 +26469,11 @@ pub unsafe fn _mm_mask_unpacklo_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpunpcklqdq))] -pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO)) +pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -25101,14 +26483,16 @@ pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 { - #[rustfmt::skip] - simd_shuffle!(a, b, - [ 0, 16, 1, 17, - 0 + 4, 16 + 4, 1 + 4, 17 + 4, - 0 + 8, 16 + 8, 1 + 8, 17 + 8, - 0 + 12, 16 + 12, 1 + 12, 17 + 12], - ) +pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 { + unsafe { + #[rustfmt::skip] + simd_shuffle!(a, b, + [ 0, 16, 1, 17, + 0 + 4, 16 + 4, 1 + 4, 17 + 4, + 0 + 8, 16 + 8, 1 + 8, 17 + 8, + 0 + 12, 16 + 12, 1 + 12, 17 + 12], + ) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25118,9 +26502,11 @@ pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { - let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16())) +pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25130,9 +26516,11 @@ pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); - transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO)) +pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { + let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25142,9 +26530,11 @@ pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { - let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8())) +pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25154,9 +26544,11 @@ pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); - transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO)) +pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { + let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); + transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO)) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25166,9 +26558,11 @@ pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4())) +pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4())) + } } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25178,9 +26572,11 @@ pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklps))] -pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); - transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO)) +pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); + transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -25190,8 +26586,8 @@ pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d { - simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) +pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d { + unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25201,14 +26597,11 @@ pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm512_mask_unpacklo_pd( - src: __m512d, - k: __mmask8, - a: __m512d, - b: __m512d, -) -> __m512d { - let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8())) +pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25218,9 +26611,11 @@ pub unsafe fn _mm512_mask_unpacklo_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); - transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO)) +pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { + let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25230,14 +26625,11 @@ pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm256_mask_unpacklo_pd( - src: __m256d, - k: __mmask8, - a: __m256d, - b: __m256d, -) -> __m256d { - let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4())) +pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25247,9 +26639,11 @@ pub unsafe fn _mm256_mask_unpacklo_pd( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); - transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO)) +pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { + let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); + transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO)) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25259,9 +26653,11 @@ pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2())) +pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2())) + } } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25271,9 +26667,11 @@ pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vunpcklpd))] -pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); - transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO)) +pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); + transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO)) + } } /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25282,12 +26680,14 @@ pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1 #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { - simd_shuffle!( - a, - _mm_undefined_ps(), - [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], - ) +pub fn _mm512_castps128_ps512(a: __m128) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ps(), + [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], + ) + } } /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25296,12 +26696,14 @@ pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 { - simd_shuffle!( - a, - _mm256_undefined_ps(), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], - ) +pub fn _mm512_castps256_ps512(a: __m256) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ps(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } } /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25310,12 +26712,14 @@ pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 { - simd_shuffle!( - a, - _mm_set1_ps(0.), - [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], - ) +pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm_set1_ps(0.), + [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], + ) + } } /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25324,12 +26728,14 @@ pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 { - simd_shuffle!( - a, - _mm256_set1_ps(0.), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], - ) +pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 { + unsafe { + simd_shuffle!( + a, + _mm256_set1_ps(0.), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], + ) + } } /// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25338,8 +26744,8 @@ pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 { - simd_shuffle!(a, a, [0, 1, 2, 3]) +pub fn _mm512_castps512_ps128(a: __m512) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } /// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25348,8 +26754,8 @@ pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 { - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm512_castps512_ps256(a: __m512) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25358,8 +26764,8 @@ pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d { - transmute(a) +pub fn _mm512_castps_pd(a: __m512) -> __m512d { + unsafe { transmute(a) } } /// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25368,8 +26774,8 @@ pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i { - transmute(a) +pub fn _mm512_castps_si512(a: __m512) -> __m512i { + unsafe { transmute(a) } } /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25378,8 +26784,8 @@ pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { - simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) +pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) } } /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25388,8 +26794,8 @@ pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { - simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25398,8 +26804,8 @@ pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { - simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) +pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) } } /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25408,8 +26814,8 @@ pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { - simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25418,8 +26824,8 @@ pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { - simd_shuffle!(a, a, [0, 1]) +pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 1]) } } /// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25428,8 +26834,8 @@ pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { - simd_shuffle!(a, a, [0, 1, 2, 3]) +pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } /// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25438,8 +26844,8 @@ pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 { - transmute(a) +pub fn _mm512_castpd_ps(a: __m512d) -> __m512 { + unsafe { transmute(a) } } /// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25448,8 +26854,8 @@ pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i { - transmute(a) +pub fn _mm512_castpd_si512(a: __m512d) -> __m512i { + unsafe { transmute(a) } } /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25458,8 +26864,8 @@ pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { - simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) +pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25468,8 +26874,8 @@ pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { - simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25478,8 +26884,8 @@ pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { - simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) +pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25488,8 +26894,8 @@ pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { - simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) +pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } } /// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25498,8 +26904,8 @@ pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i { - simd_shuffle!(a, a, [0, 1]) +pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i { + unsafe { simd_shuffle!(a, a, [0, 1]) } } /// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25508,8 +26914,8 @@ pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i { - simd_shuffle!(a, a, [0, 1, 2, 3]) +pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) } } /// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25518,8 +26924,8 @@ pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 { - transmute(a) +pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 { + unsafe { transmute(a) } } /// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25528,8 +26934,8 @@ pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d { - transmute(a) +pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d { + unsafe { transmute(a) } } /// Copy the lower 32-bit integer in a to dst. @@ -25539,8 +26945,8 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))] -pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { - simd_extract!(a.as_i32x16(), 0) +pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { + unsafe { simd_extract!(a.as_i32x16(), 0) } } /// Copy the lower single-precision (32-bit) floating-point element of a to dst. @@ -25549,8 +26955,8 @@ pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtss_f32(a: __m512) -> f32 { - simd_extract!(a, 0) +pub fn _mm512_cvtss_f32(a: __m512) -> f32 { + unsafe { simd_extract!(a, 0) } } /// Copy the lower double-precision (64-bit) floating-point element of a to dst. @@ -25559,8 +26965,8 @@ pub unsafe fn _mm512_cvtss_f32(a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_cvtsd_f64(a: __m512d) -> f64 { - simd_extract!(a, 0) +pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 { + unsafe { simd_extract!(a, 0) } } /// Broadcast the low packed 32-bit integer from a to all elements of dst. @@ -25570,10 +26976,12 @@ pub unsafe fn _mm512_cvtsd_f64(a: __m512d) -> f64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i { - let a = _mm512_castsi128_si512(a).as_i32x16(); - let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); - transmute(ret) +pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i { + unsafe { + let a = _mm512_castsi128_si512(a).as_i32x16(); + let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + transmute(ret) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25583,9 +26991,11 @@ pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) +pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25595,9 +27005,11 @@ pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) +pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25607,9 +27019,11 @@ pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) +pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25619,9 +27033,11 @@ pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) +pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25631,9 +27047,11 @@ pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, broadcast, src.as_i32x4())) +pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x4())) + } } /// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25643,9 +27061,11 @@ pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd -pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO)) +pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO)) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst. @@ -25655,8 +27075,8 @@ pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { - simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) +pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25666,9 +27086,11 @@ pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) +pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25678,9 +27100,11 @@ pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) +pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25690,9 +27114,11 @@ pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, broadcast, src.as_i64x4())) +pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x4())) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25702,9 +27128,11 @@ pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); - transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO)) +pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO)) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25714,9 +27142,11 @@ pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, broadcast, src.as_i64x2())) +pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x2())) + } } /// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25726,9 +27156,11 @@ pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq -pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { - let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); - transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO)) +pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO)) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst. @@ -25738,8 +27170,8 @@ pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 { - simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25749,9 +27181,11 @@ pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 { - let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) +pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25761,9 +27195,11 @@ pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 { - let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) +pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25773,9 +27209,11 @@ pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 { - let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) +pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25785,9 +27223,11 @@ pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 { - let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) +pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25797,9 +27237,11 @@ pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { - let broadcast = _mm_broadcastss_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, broadcast, src.as_f32x4())) +pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { + unsafe { + let broadcast = _mm_broadcastss_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x4())) + } } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25809,9 +27251,11 @@ pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastss))] -pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { - let broadcast = _mm_broadcastss_ps(a).as_f32x4(); - transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO)) +pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { + unsafe { + let broadcast = _mm_broadcastss_ps(a).as_f32x4(); + transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO)) + } } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst. @@ -25821,8 +27265,8 @@ pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d { - simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) +pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d { + unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25832,9 +27276,11 @@ pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { - let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) +pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) + } } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25844,9 +27290,11 @@ pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d { - let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) +pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) + } } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25856,9 +27304,11 @@ pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { - let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, broadcast, src.as_f64x4())) +pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x4())) + } } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25868,9 +27318,11 @@ pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vbroadcastsd))] -pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { - let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); - transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO)) +pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { + unsafe { + let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO)) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst. @@ -25879,10 +27331,12 @@ pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i { - let a = a.as_i32x4(); - let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]); - transmute(ret) +pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i { + unsafe { + let a = a.as_i32x4(); + let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]); + transmute(ret) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25891,9 +27345,11 @@ pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) +pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x16())) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25902,9 +27358,11 @@ pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i { - let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) +pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst. @@ -25913,10 +27371,12 @@ pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i { - let a = a.as_i32x4(); - let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]); - transmute(ret) +pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i { + unsafe { + let a = a.as_i32x4(); + let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]); + transmute(ret) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25925,9 +27385,11 @@ pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) +pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i32x8())) + } } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25936,9 +27398,11 @@ pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { - let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) +pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { + unsafe { + let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) + } } /// Broadcast the 4 packed 64-bit integers from a to all elements of dst. @@ -25947,8 +27411,8 @@ pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i { - simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) +pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } } /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25957,9 +27421,11 @@ pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { - let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) +pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_i64x8())) + } } /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -25968,9 +27434,11 @@ pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { - let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) +pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { + unsafe { + let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) + } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. @@ -25979,8 +27447,8 @@ pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 { - simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) +pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25989,9 +27457,11 @@ pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 { - let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) +pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x16())) + } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26000,9 +27470,11 @@ pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { - let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) +pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { + unsafe { + let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) + } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. @@ -26011,8 +27483,8 @@ pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 { - simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) +pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26021,9 +27493,11 @@ pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 { #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 { - let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) +pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f32x8())) + } } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26032,9 +27506,11 @@ pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) - #[inline] #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { - let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) +pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { + unsafe { + let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) + } } /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst. @@ -26043,8 +27519,8 @@ pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d { - simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) +pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) } } /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26053,9 +27529,11 @@ pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d { - let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) +pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, src.as_f64x8())) + } } /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26064,9 +27542,11 @@ pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) #[inline] #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d { - let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) +pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d { + unsafe { + let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) + } } /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. @@ -26076,8 +27556,8 @@ pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd -pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) +pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) } } /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. @@ -26087,8 +27567,8 @@ pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> _ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd -pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) +pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) } } /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. @@ -26098,8 +27578,8 @@ pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd -pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) +pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) } } /// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. @@ -26109,8 +27589,8 @@ pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq -pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) +pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) } } /// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. @@ -26120,8 +27600,8 @@ pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq -pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) +pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) } } /// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst. @@ -26131,8 +27611,8 @@ pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq -pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) +pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) } } /// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26142,8 +27622,8 @@ pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps -pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { - transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) +pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) } } /// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26153,8 +27633,8 @@ pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps -pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) +pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) } } /// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26164,8 +27644,8 @@ pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps -pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) +pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) } } /// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26175,8 +27655,8 @@ pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd -pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { - transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) +pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) } } /// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26186,8 +27666,8 @@ pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m51 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd -pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) +pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) } } /// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst. @@ -26197,8 +27677,8 @@ pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd -pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) +pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst. @@ -26211,75 +27691,77 @@ pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let imm8: i32 = IMM8 % 16; - let r: i32x16 = match imm8 { - 0 => simd_shuffle!( - a, - b, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - ], - ), - 1 => simd_shuffle!( - a, - b, - [ - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, - ], - ), - 2 => simd_shuffle!( - a, - b, - [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1], - ), - 3 => simd_shuffle!( - a, - b, - [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2], - ), - 4 => simd_shuffle!( - a, - b, - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3], - ), - 5 => simd_shuffle!( - a, - b, - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4], - ), - 6 => simd_shuffle!( - a, - b, - [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5], - ), - 7 => simd_shuffle!( - a, - b, - [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6], - ), - 8 => simd_shuffle!( - a, - b, - [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7], - ), - 9 => simd_shuffle!( - a, - b, - [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8], - ), - 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), - 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), - 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), - 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), - 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let imm8: i32 = IMM8 % 16; + let r: i32x16 = match imm8 { + 0 => simd_shuffle!( + a, + b, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ], + ), + 1 => simd_shuffle!( + a, + b, + [ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, + ], + ), + 2 => simd_shuffle!( + a, + b, + [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1], + ), + 3 => simd_shuffle!( + a, + b, + [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2], + ), + 4 => simd_shuffle!( + a, + b, + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3], + ), + 5 => simd_shuffle!( + a, + b, + [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4], + ), + 6 => simd_shuffle!( + a, + b, + [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5], + ), + 7 => simd_shuffle!( + a, + b, + [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6], + ), + 8 => simd_shuffle!( + a, + b, + [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7], + ), + 9 => simd_shuffle!( + a, + b, + [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8], + ), + 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), + 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), + 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), + 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26290,15 +27772,17 @@ pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_alignr_epi32( +pub fn _mm512_mask_alignr_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16())) + } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26309,14 +27793,12 @@ pub unsafe fn _mm512_mask_alignr_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_alignr_epi32( - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) +pub fn _mm512_maskz_alignr_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst. @@ -26329,23 +27811,25 @@ pub unsafe fn _mm512_maskz_alignr_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_alignr_epi32(a: __m256i, b: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let imm8: i32 = IMM8 % 8; - let r: i32x8 = match imm8 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), - 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), - 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), - 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), - 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), - 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), - 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm256_alignr_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let imm8: i32 = IMM8 % 8; + let r: i32x8 = match imm8 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26356,15 +27840,17 @@ pub unsafe fn _mm256_alignr_epi32(a: __m256i, b: __m256i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_alignr_epi32( +pub fn _mm256_mask_alignr_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8())) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26375,14 +27861,12 @@ pub unsafe fn _mm256_mask_alignr_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_alignr_epi32( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) +pub fn _mm256_maskz_alignr_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst. @@ -26395,19 +27879,21 @@ pub unsafe fn _mm256_maskz_alignr_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_alignr_epi32(a: __m128i, b: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let imm8: i32 = IMM8 % 4; - let r: i32x4 = match imm8 { - 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), - 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), - 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm_alignr_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let imm8: i32 = IMM8 % 4; + let r: i32x4 = match imm8 { + 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26418,15 +27904,17 @@ pub unsafe fn _mm_alignr_epi32(a: __m128i, b: __m128i) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_alignr_epi32( +pub fn _mm_mask_alignr_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26436,15 +27924,13 @@ pub unsafe fn _mm_mask_alignr_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_alignr_epi32( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi32::(a, b); - transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) +#[rustc_legacy_const_generics(3)] +pub fn _mm_maskz_alignr_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi32::(a, b); + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) + } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst. @@ -26457,21 +27943,23 @@ pub unsafe fn _mm_maskz_alignr_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let imm8: i32 = IMM8 % 8; - let r: i64x8 = match imm8 { - 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), - 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), - 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), - 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), - 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), - 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), - 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), - 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm512_alignr_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 8; + let r: i64x8 = match imm8 { + 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]), + 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]), + 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]), + 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]), + 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]), + 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]), + 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26482,15 +27970,17 @@ pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_alignr_epi64( +pub fn _mm512_mask_alignr_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8())) + } } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26501,14 +27991,12 @@ pub unsafe fn _mm512_mask_alignr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_alignr_epi64( - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm512_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_maskz_alignr_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm512_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst. @@ -26521,17 +28009,19 @@ pub unsafe fn _mm512_maskz_alignr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_alignr_epi64(a: __m256i, b: __m256i) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let imm8: i32 = IMM8 % 4; - let r: i64x4 = match imm8 { - 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), - 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), - 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), - 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm256_alignr_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 4; + let r: i64x4 = match imm8 { + 0 => simd_shuffle!(a, b, [4, 5, 6, 7]), + 1 => simd_shuffle!(a, b, [5, 6, 7, 0]), + 2 => simd_shuffle!(a, b, [6, 7, 0, 1]), + 3 => simd_shuffle!(a, b, [7, 0, 1, 2]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26542,15 +28032,17 @@ pub unsafe fn _mm256_alignr_epi64(a: __m256i, b: __m256i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_alignr_epi64( +pub fn _mm256_mask_alignr_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) + } } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26561,14 +28053,12 @@ pub unsafe fn _mm256_mask_alignr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_alignr_epi64( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm256_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) +pub fn _mm256_maskz_alignr_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm256_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst. @@ -26581,15 +28071,17 @@ pub unsafe fn _mm256_maskz_alignr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_alignr_epi64(a: __m128i, b: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let imm8: i32 = IMM8 % 2; - let r: i64x2 = match imm8 { - 0 => simd_shuffle!(a, b, [2, 3]), - 1 => simd_shuffle!(a, b, [3, 0]), - _ => unreachable_unchecked(), - }; - transmute(r) +pub fn _mm_alignr_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let imm8: i32 = IMM8 % 2; + let r: i64x2 = match imm8 { + 0 => simd_shuffle!(a, b, [2, 3]), + 1 => simd_shuffle!(a, b, [3, 0]), + _ => unreachable_unchecked(), + }; + transmute(r) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26600,15 +28092,17 @@ pub unsafe fn _mm_alignr_epi64(a: __m128i, b: __m128i) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_alignr_epi64( +pub fn _mm_mask_alignr_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2())) + } } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26619,14 +28113,12 @@ pub unsafe fn _mm_mask_alignr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_alignr_epi64( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let r = _mm_alignr_epi64::(a, b); - transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO)) +pub fn _mm_maskz_alignr_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let r = _mm_alignr_epi64::(a, b); + transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO)) + } } /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. @@ -26636,8 +28128,8 @@ pub unsafe fn _mm_maskz_alignr_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq -pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } } /// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26647,9 +28139,11 @@ pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, and, src.as_i32x16())) +pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, src.as_i32x16())) + } } /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26659,9 +28153,11 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, and, i32x16::ZERO)) +pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, i32x16::ZERO)) + } } /// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26671,9 +28167,11 @@ pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let and = simd_and(a.as_i32x8(), b.as_i32x8()); - transmute(simd_select_bitmask(k, and, src.as_i32x8())) +pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, and, src.as_i32x8())) + } } /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26683,9 +28181,11 @@ pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let and = simd_and(a.as_i32x8(), b.as_i32x8()); - transmute(simd_select_bitmask(k, and, i32x8::ZERO)) +pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, and, i32x8::ZERO)) + } } /// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26695,9 +28195,11 @@ pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let and = simd_and(a.as_i32x4(), b.as_i32x4()); - transmute(simd_select_bitmask(k, and, src.as_i32x4())) +pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, and, src.as_i32x4())) + } } /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26707,9 +28209,11 @@ pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let and = simd_and(a.as_i32x4(), b.as_i32x4()); - transmute(simd_select_bitmask(k, and, i32x4::ZERO)) +pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, and, i32x4::ZERO)) + } } /// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. @@ -26719,8 +28223,8 @@ pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26730,9 +28234,11 @@ pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, and, src.as_i64x8())) +pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, src.as_i64x8())) + } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26742,9 +28248,11 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, and, i64x8::ZERO)) +pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, i64x8::ZERO)) + } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26754,9 +28262,11 @@ pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let and = simd_and(a.as_i64x4(), b.as_i64x4()); - transmute(simd_select_bitmask(k, and, src.as_i64x4())) +pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, and, src.as_i64x4())) + } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26766,9 +28276,11 @@ pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let and = simd_and(a.as_i64x4(), b.as_i64x4()); - transmute(simd_select_bitmask(k, and, i64x4::ZERO)) +pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let and = simd_and(a.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, and, i64x4::ZERO)) + } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26778,9 +28290,11 @@ pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let and = simd_and(a.as_i64x2(), b.as_i64x2()); - transmute(simd_select_bitmask(k, and, src.as_i64x2())) +pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, and, src.as_i64x2())) + } } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26790,9 +28304,11 @@ pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let and = simd_and(a.as_i64x2(), b.as_i64x2()); - transmute(simd_select_bitmask(k, and, i64x2::ZERO)) +pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let and = simd_and(a.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, and, i64x2::ZERO)) + } } /// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -26802,8 +28318,8 @@ pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. @@ -26813,8 +28329,8 @@ pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26824,9 +28340,11 @@ pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, or, src.as_i32x16())) +pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, src.as_i32x16())) + } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26836,9 +28354,11 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, or, i32x16::ZERO)) +pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, i32x16::ZERO)) + } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. @@ -26848,8 +28368,8 @@ pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vor))] //should be vpord -pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_or(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26859,9 +28379,11 @@ pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let or = _mm256_or_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, or, src.as_i32x8())) +pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, or, src.as_i32x8())) + } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26871,9 +28393,11 @@ pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let or = _mm256_or_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, or, i32x8::ZERO)) +pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, or, i32x8::ZERO)) + } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. @@ -26883,8 +28407,8 @@ pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vor))] //should be vpord -pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_or(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26894,9 +28418,11 @@ pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let or = _mm_or_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, or, src.as_i32x4())) +pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, or, src.as_i32x4())) + } } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26906,9 +28432,11 @@ pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let or = _mm_or_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, or, i32x4::ZERO)) +pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, or, i32x4::ZERO)) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -26918,8 +28446,8 @@ pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26929,9 +28457,11 @@ pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, or, src.as_i64x8())) +pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, src.as_i64x8())) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26941,9 +28471,11 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, or, i64x8::ZERO)) +pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, i64x8::ZERO)) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -26953,8 +28485,8 @@ pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m5 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vor))] //should be vporq -pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_or(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26964,9 +28496,11 @@ pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let or = _mm256_or_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, or, src.as_i64x4())) +pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, or, src.as_i64x4())) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -26976,9 +28510,11 @@ pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let or = _mm256_or_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, or, i64x4::ZERO)) +pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let or = _mm256_or_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, or, i64x4::ZERO)) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -26988,8 +28524,8 @@ pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m2 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vor))] //should be vporq -pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_or(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26999,9 +28535,11 @@ pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let or = _mm_or_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, or, src.as_i64x2())) +pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, or, src.as_i64x2())) + } } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27011,9 +28549,11 @@ pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let or = _mm_or_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, or, i64x2::ZERO)) +pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let or = _mm_or_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, or, i64x2::ZERO)) + } } /// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -27023,8 +28563,8 @@ pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. @@ -27034,8 +28574,8 @@ pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord -pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27045,9 +28585,11 @@ pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, xor, src.as_i32x16())) +pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, src.as_i32x16())) + } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27057,9 +28599,11 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, xor, i32x16::ZERO)) +pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, i32x16::ZERO)) + } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. @@ -27069,8 +28613,8 @@ pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxor))] //should be vpxord -pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27080,9 +28624,11 @@ pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let xor = _mm256_xor_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, xor, src.as_i32x8())) +pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, xor, src.as_i32x8())) + } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27092,9 +28638,11 @@ pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let xor = _mm256_xor_epi32(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, xor, i32x8::ZERO)) +pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi32(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, xor, i32x8::ZERO)) + } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. @@ -27104,8 +28652,8 @@ pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxor))] //should be vpxord -pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27115,9 +28663,11 @@ pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let xor = _mm_xor_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, xor, src.as_i32x4())) +pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, xor, src.as_i32x4())) + } } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27127,9 +28677,11 @@ pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let xor = _mm_xor_epi32(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, xor, i32x4::ZERO)) +pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi32(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, xor, i32x4::ZERO)) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -27139,8 +28691,8 @@ pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27150,9 +28702,11 @@ pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, xor, src.as_i64x8())) +pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, src.as_i64x8())) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27162,9 +28716,11 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, xor, i64x8::ZERO)) +pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, i64x8::ZERO)) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -27174,8 +28730,8 @@ pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxor))] //should be vpxorq -pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27185,9 +28741,11 @@ pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let xor = _mm256_xor_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, xor, src.as_i64x4())) +pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, xor, src.as_i64x4())) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27197,9 +28755,11 @@ pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let xor = _mm256_xor_epi64(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, xor, i64x4::ZERO)) +pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let xor = _mm256_xor_epi64(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, xor, i64x4::ZERO)) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -27209,8 +28769,8 @@ pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vxor))] //should be vpxorq -pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27220,9 +28780,11 @@ pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let xor = _mm_xor_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, xor, src.as_i64x2())) +pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, xor, src.as_i64x2())) + } } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27232,9 +28794,11 @@ pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let xor = _mm_xor_epi64(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, xor, i64x2::ZERO)) +pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let xor = _mm_xor_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, xor, i64x2::ZERO)) + } } /// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -27244,8 +28808,8 @@ pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst. @@ -27255,7 +28819,7 @@ pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd -pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i { _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b) } @@ -27266,14 +28830,11 @@ pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm512_mask_andnot_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, andnot, src.as_i32x16())) +pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, andnot, src.as_i32x16())) + } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27283,9 +28844,11 @@ pub unsafe fn _mm512_mask_andnot_epi32( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, andnot, i32x16::ZERO)) +pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, andnot, i32x16::ZERO)) + } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27295,15 +28858,12 @@ pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm256_mask_andnot_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); - let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); - transmute(simd_select_bitmask(k, andnot, src.as_i32x8())) +pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, andnot, src.as_i32x8())) + } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27313,10 +28873,12 @@ pub unsafe fn _mm256_mask_andnot_epi32( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); - let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); - transmute(simd_select_bitmask(k, andnot, i32x8::ZERO)) +pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); + transmute(simd_select_bitmask(k, andnot, i32x8::ZERO)) + } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27326,10 +28888,12 @@ pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); - let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); - transmute(simd_select_bitmask(k, andnot, src.as_i32x4())) +pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, andnot, src.as_i32x4())) + } } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27339,10 +28903,12 @@ pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnd))] -pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); - let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); - transmute(simd_select_bitmask(k, andnot, i32x4::ZERO)) +pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); + let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); + transmute(simd_select_bitmask(k, andnot, i32x4::ZERO)) + } } /// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst. @@ -27352,7 +28918,7 @@ pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd -pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i { _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b) } @@ -27363,14 +28929,11 @@ pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm512_mask_andnot_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, andnot, src.as_i64x8())) +pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, andnot, src.as_i64x8())) + } } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27380,9 +28943,11 @@ pub unsafe fn _mm512_mask_andnot_epi64( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, andnot, i64x8::ZERO)) +pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, andnot, i64x8::ZERO)) + } } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27392,15 +28957,12 @@ pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm256_mask_andnot_epi64( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); - let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); - transmute(simd_select_bitmask(k, andnot, src.as_i64x4())) +pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, andnot, src.as_i64x4())) + } } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27410,10 +28972,12 @@ pub unsafe fn _mm256_mask_andnot_epi64( #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { - let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); - let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); - transmute(simd_select_bitmask(k, andnot, i64x4::ZERO)) +pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); + transmute(simd_select_bitmask(k, andnot, i64x4::ZERO)) + } } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27423,10 +28987,12 @@ pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); - let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); - transmute(simd_select_bitmask(k, andnot, src.as_i64x2())) +pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, andnot, src.as_i64x2())) + } } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -27436,10 +29002,12 @@ pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); - let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); - transmute(simd_select_bitmask(k, andnot, i64x2::ZERO)) +pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); + let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); + transmute(simd_select_bitmask(k, andnot, i64x2::ZERO)) + } } /// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst. @@ -27449,7 +29017,7 @@ pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpandnq))] -pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i { _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b) } @@ -27459,7 +29027,7 @@ pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 { +pub fn _cvtmask16_u32(a: __mmask16) -> u32 { a as u32 } @@ -27469,7 +29037,7 @@ pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 { +pub fn _cvtu32_mask16(a: u32) -> __mmask16 { a as __mmask16 } @@ -27480,7 +29048,7 @@ pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw -pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { a & b } @@ -27491,7 +29059,7 @@ pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw -pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { a & b } @@ -27502,7 +29070,7 @@ pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw -pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { a | b } @@ -27513,7 +29081,7 @@ pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw -pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { a | b } @@ -27524,7 +29092,7 @@ pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw -pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { a ^ b } @@ -27535,7 +29103,7 @@ pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw -pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { a ^ b } @@ -27545,7 +29113,7 @@ pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 { +pub fn _knot_mask16(a: __mmask16) -> __mmask16 { a ^ 0b11111111_11111111 } @@ -27555,7 +29123,7 @@ pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 { +pub fn _mm512_knot(a: __mmask16) -> __mmask16 { a ^ 0b11111111_11111111 } @@ -27566,7 +29134,7 @@ pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw -pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { _mm512_kand(_mm512_knot(a), b) } @@ -27577,7 +29145,7 @@ pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw -pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 { _mm512_kand(_mm512_knot(a), b) } @@ -27588,7 +29156,7 @@ pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw -pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { _mm512_knot(_mm512_kxor(a, b)) } @@ -27599,7 +29167,7 @@ pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw -pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 { _mm512_knot(_mm512_kxor(a, b)) } @@ -27623,7 +29191,7 @@ pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { +pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { (_kor_mask16(a, b) == 0xffff) as u8 } @@ -27634,7 +29202,7 @@ pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { +pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { (_kor_mask16(a, b) == 0) as u8 } @@ -27645,7 +29213,7 @@ pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 { #[target_feature(enable = "avx512f")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { +pub fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { a << COUNT } @@ -27656,7 +29224,7 @@ pub unsafe fn _kshiftli_mask16(a: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _kshiftri_mask16(a: __mmask16) -> __mmask16 { +pub fn _kshiftri_mask16(a: __mmask16) -> __mmask16 { a >> COUNT } @@ -27687,7 +29255,7 @@ pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw -pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 { +pub fn _mm512_kmov(a: __mmask16) -> __mmask16 { a } @@ -27697,7 +29265,7 @@ pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 { #[inline] #[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 { +pub fn _mm512_int2mask(mask: i32) -> __mmask16 { mask as u16 } @@ -27708,7 +29276,7 @@ pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw -pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 { +pub fn _mm512_mask2int(k1: __mmask16) -> i32 { k1 as i32 } @@ -27719,7 +29287,7 @@ pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw -pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 { +pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 { ((a & 0xff) << 8) | (b & 0xff) } @@ -27730,7 +29298,7 @@ pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw -pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 { +pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 { let r = (a | b) == 0b11111111_11111111; r as i32 } @@ -27742,7 +29310,7 @@ pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw -pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 { +pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 { let r = (a | b) == 0; r as i32 } @@ -27754,7 +29322,7 @@ pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { let and = _mm512_and_epi32(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpneq_epi32_mask(and, zero) @@ -27767,7 +29335,7 @@ pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { let and = _mm512_and_epi32(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpneq_epi32_mask(k, and, zero) @@ -27780,7 +29348,7 @@ pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpneq_epi32_mask(and, zero) @@ -27793,7 +29361,7 @@ pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpneq_epi32_mask(k, and, zero) @@ -27806,7 +29374,7 @@ pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpneq_epi32_mask(and, zero) @@ -27819,7 +29387,7 @@ pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmd))] -pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpneq_epi32_mask(k, and, zero) @@ -27832,7 +29400,7 @@ pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { let and = _mm512_and_epi64(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpneq_epi64_mask(and, zero) @@ -27845,7 +29413,7 @@ pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { let and = _mm512_and_epi64(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpneq_epi64_mask(k, and, zero) @@ -27858,7 +29426,7 @@ pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpneq_epi64_mask(and, zero) @@ -27871,7 +29439,7 @@ pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpneq_epi64_mask(k, and, zero) @@ -27884,7 +29452,7 @@ pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) - #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpneq_epi64_mask(and, zero) @@ -27897,7 +29465,7 @@ pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestmq))] -pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpneq_epi64_mask(k, and, zero) @@ -27910,7 +29478,7 @@ pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> _ #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { let and = _mm512_and_epi32(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpeq_epi32_mask(and, zero) @@ -27923,7 +29491,7 @@ pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { let and = _mm512_and_epi32(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpeq_epi32_mask(k, and, zero) @@ -27936,7 +29504,7 @@ pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpeq_epi32_mask(and, zero) @@ -27949,7 +29517,7 @@ pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpeq_epi32_mask(k, and, zero) @@ -27962,7 +29530,7 @@ pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpeq_epi32_mask(and, zero) @@ -27975,7 +29543,7 @@ pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmd))] -pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpeq_epi32_mask(k, and, zero) @@ -27988,7 +29556,7 @@ pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { let and = _mm512_and_epi64(a, b); let zero = _mm512_setzero_si512(); _mm512_cmpeq_epi64_mask(and, zero) @@ -28001,7 +29569,7 @@ pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { let and = _mm512_and_epi64(a, b); let zero = _mm512_setzero_si512(); _mm512_mask_cmpeq_epi64_mask(k, and, zero) @@ -28014,7 +29582,7 @@ pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_cmpeq_epi64_mask(and, zero) @@ -28027,7 +29595,7 @@ pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { let and = _mm256_and_si256(a, b); let zero = _mm256_setzero_si256(); _mm256_mask_cmpeq_epi64_mask(k, and, zero) @@ -28040,7 +29608,7 @@ pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_cmpeq_epi64_mask(and, zero) @@ -28053,7 +29621,7 @@ pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vptestnmq))] -pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { let and = _mm_and_si128(a, b); let zero = _mm_setzero_si128(); _mm_mask_cmpeq_epi64_mask(k, and, zero) @@ -28162,7 +29730,7 @@ pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_ps( +pub fn _mm512_set_ps( e0: f32, e1: f32, e2: f32, @@ -28192,7 +29760,7 @@ pub unsafe fn _mm512_set_ps( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr_ps( +pub fn _mm512_setr_ps( e0: f32, e1: f32, e2: f32, @@ -28210,10 +29778,12 @@ pub unsafe fn _mm512_setr_ps( e14: f32, e15: f32, ) -> __m512 { - let r = f32x16::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ); - transmute(r) + unsafe { + let r = f32x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ); + transmute(r) + } } /// Broadcast 64-bit float `a` to all elements of `dst`. @@ -28222,8 +29792,8 @@ pub unsafe fn _mm512_setr_ps( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { - transmute(f64x8::splat(a)) +pub fn _mm512_set1_pd(a: f64) -> __m512d { + unsafe { transmute(f64x8::splat(a)) } } /// Broadcast 32-bit float `a` to all elements of `dst`. @@ -28232,8 +29802,8 @@ pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { - transmute(f32x16::splat(a)) +pub fn _mm512_set1_ps(a: f32) -> __m512 { + unsafe { transmute(f32x16::splat(a)) } } /// Sets packed 32-bit integers in `dst` with the supplied values. @@ -28242,7 +29812,7 @@ pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_epi32( +pub fn _mm512_set_epi32( e15: i32, e14: i32, e13: i32, @@ -28271,8 +29841,8 @@ pub unsafe fn _mm512_set_epi32( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i { - transmute(i8x64::splat(a)) +pub fn _mm512_set1_epi8(a: i8) -> __m512i { + unsafe { transmute(i8x64::splat(a)) } } /// Broadcast the low packed 16-bit integer from a to all elements of dst. @@ -28281,8 +29851,8 @@ pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i { - transmute(i16x32::splat(a)) +pub fn _mm512_set1_epi16(a: i16) -> __m512i { + unsafe { transmute(i16x32::splat(a)) } } /// Broadcast 32-bit integer `a` to all elements of `dst`. @@ -28291,8 +29861,8 @@ pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { - transmute(i32x16::splat(a)) +pub fn _mm512_set1_epi32(a: i32) -> __m512i { + unsafe { transmute(i32x16::splat(a)) } } /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28302,9 +29872,11 @@ pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i { - let r = _mm512_set1_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i { + unsafe { + let r = _mm512_set1_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28314,9 +29886,11 @@ pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m5 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i { - let r = _mm512_set1_epi32(a).as_i32x16(); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i { + unsafe { + let r = _mm512_set1_epi32(a).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28326,9 +29900,11 @@ pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i { - let r = _mm256_set1_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i { + unsafe { + let r = _mm256_set1_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28338,9 +29914,11 @@ pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i { - let r = _mm256_set1_epi32(a).as_i32x8(); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i { + unsafe { + let r = _mm256_set1_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28350,9 +29928,11 @@ pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i { - let r = _mm_set1_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i { + unsafe { + let r = _mm_set1_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28362,9 +29942,11 @@ pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastd))] -pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i { - let r = _mm_set1_epi32(a).as_i32x4(); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i { + unsafe { + let r = _mm_set1_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Broadcast 64-bit integer `a` to all elements of `dst`. @@ -28373,8 +29955,8 @@ pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { - transmute(i64x8::splat(a)) +pub fn _mm512_set1_epi64(a: i64) -> __m512i { + unsafe { transmute(i64x8::splat(a)) } } /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28384,9 +29966,11 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i { - let r = _mm512_set1_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, r, src.as_i64x8())) +pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i { + unsafe { + let r = _mm512_set1_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, r, src.as_i64x8())) + } } /// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28396,9 +29980,11 @@ pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m51 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i { - let r = _mm512_set1_epi64(a).as_i64x8(); - transmute(simd_select_bitmask(k, r, i64x8::ZERO)) +pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i { + unsafe { + let r = _mm512_set1_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) + } } /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28408,9 +29994,11 @@ pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i { - let r = _mm256_set1_epi64x(a).as_i64x4(); - transmute(simd_select_bitmask(k, r, src.as_i64x4())) +pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i { + unsafe { + let r = _mm256_set1_epi64x(a).as_i64x4(); + transmute(simd_select_bitmask(k, r, src.as_i64x4())) + } } /// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28420,9 +30008,11 @@ pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m25 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i { - let r = _mm256_set1_epi64x(a).as_i64x4(); - transmute(simd_select_bitmask(k, r, i64x4::ZERO)) +pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i { + unsafe { + let r = _mm256_set1_epi64x(a).as_i64x4(); + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) + } } /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28432,9 +30022,11 @@ pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i { - let r = _mm_set1_epi64x(a).as_i64x2(); - transmute(simd_select_bitmask(k, r, src.as_i64x2())) +pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i { + unsafe { + let r = _mm_set1_epi64x(a).as_i64x2(); + transmute(simd_select_bitmask(k, r, src.as_i64x2())) + } } /// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -28444,9 +30036,11 @@ pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpbroadcastq))] -pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i { - let r = _mm_set1_epi64x(a).as_i64x2(); - transmute(simd_select_bitmask(k, r, i64x2::ZERO)) +pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i { + unsafe { + let r = _mm_set1_epi64x(a).as_i64x2(); + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) + } } /// Set packed 64-bit integers in dst with the repeated 4 element sequence. @@ -28455,7 +30049,7 @@ pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { +pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { _mm512_set_epi64(d, c, b, a, d, c, b, a) } @@ -28465,7 +30059,7 @@ pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { +pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { _mm512_set_epi64(a, b, c, d, a, b, c, d) } @@ -28476,7 +30070,7 @@ pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b) } @@ -28487,7 +30081,7 @@ pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b) } @@ -28498,7 +30092,7 @@ pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b) } @@ -28509,7 +30103,7 @@ pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b) } @@ -28520,7 +30114,7 @@ pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b) } @@ -28531,7 +30125,7 @@ pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b) } @@ -28542,7 +30136,7 @@ pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b) } @@ -28553,7 +30147,7 @@ pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b) } @@ -28564,7 +30158,7 @@ pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b) } @@ -28575,7 +30169,7 @@ pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b) } @@ -28586,7 +30180,7 @@ pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b) } @@ -28597,7 +30191,7 @@ pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b) } @@ -28609,13 +30203,15 @@ pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm512_cmp_ps_mask(a: __m512, b: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -28626,16 +30222,14 @@ pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512) -> __mma #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_ps_mask( - k1: __mmask16, - a: __m512, - b: __m512, -) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm512_mask_cmp_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -28646,13 +30240,15 @@ pub unsafe fn _mm512_mask_cmp_ps_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_ps_mask(a: __m256, b: __m256) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let r = vcmpps256(a, b, IMM8, neg_one); - r.cast_unsigned() +pub fn _mm256_cmp_ps_mask(a: __m256, b: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r = vcmpps256(a, b, IMM8, neg_one); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -28663,16 +30259,14 @@ pub unsafe fn _mm256_cmp_ps_mask(a: __m256, b: __m256) -> __mma #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_ps_mask( - k1: __mmask8, - a: __m256, - b: __m256, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f32x8(); - let b = b.as_f32x8(); - let r = vcmpps256(a, b, IMM8, k1 as i8); - r.cast_unsigned() +pub fn _mm256_mask_cmp_ps_mask(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x8(); + let b = b.as_f32x8(); + let r = vcmpps256(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -28683,13 +30277,15 @@ pub unsafe fn _mm256_mask_cmp_ps_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_ps_mask(a: __m128, b: __m128) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vcmpps128(a, b, IMM8, neg_one); - r.cast_unsigned() +pub fn _mm_cmp_ps_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vcmpps128(a, b, IMM8, neg_one); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -28700,16 +30296,14 @@ pub unsafe fn _mm_cmp_ps_mask(a: __m128, b: __m128) -> __mmask8 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_ps_mask( - k1: __mmask8, - a: __m128, - b: __m128, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vcmpps128(a, b, IMM8, k1 as i8); - r.cast_unsigned() +pub fn _mm_mask_cmp_ps_mask(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vcmpps128(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\ @@ -28721,17 +30315,19 @@ pub unsafe fn _mm_mask_cmp_ps_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_cmp_round_ps_mask( +pub fn _mm512_cmp_round_ps_mask( a: __m512, b: __m512, ) -> __mmask16 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let neg_one = -1; - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vcmpps(a, b, IMM5, neg_one, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\ @@ -28743,17 +30339,19 @@ pub unsafe fn _mm512_cmp_round_ps_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_cmp_round_ps_mask( +pub fn _mm512_mask_cmp_round_ps_mask( m: __mmask16, a: __m512, b: __m512, ) -> __mmask16 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x16(); - let b = b.as_f32x16(); - let r = vcmpps(a, b, IMM5, m as i16, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x16(); + let b = b.as_f32x16(); + let r = vcmpps(a, b, IMM5, m as i16, SAE); + r.cast_unsigned() + } } /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k. @@ -28763,7 +30361,7 @@ pub unsafe fn _mm512_mask_cmp_round_ps_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmps -pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b) } @@ -28774,7 +30372,7 @@ pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b) } @@ -28785,7 +30383,7 @@ pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b) } @@ -28796,7 +30394,7 @@ pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps -pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { +pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 { _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b) } @@ -28807,7 +30405,7 @@ pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b) } @@ -28818,7 +30416,7 @@ pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b) } @@ -28829,7 +30427,7 @@ pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b) } @@ -28840,7 +30438,7 @@ pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b) } @@ -28851,7 +30449,7 @@ pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b) } @@ -28862,7 +30460,7 @@ pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b) } @@ -28873,7 +30471,7 @@ pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b) } @@ -28884,7 +30482,7 @@ pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b) } @@ -28895,7 +30493,7 @@ pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b) } @@ -28906,7 +30504,7 @@ pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b) } @@ -28917,7 +30515,7 @@ pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b) } @@ -28928,7 +30526,7 @@ pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b) } @@ -28940,13 +30538,15 @@ pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -28957,16 +30557,14 @@ pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm512_mask_cmp_pd_mask( - k1: __mmask8, - a: __m512d, - b: __m512d, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm512_mask_cmp_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -28977,13 +30575,15 @@ pub unsafe fn _mm512_mask_cmp_pd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm256_cmp_pd_mask(a: __m256d, b: __m256d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let r = vcmppd256(a, b, IMM8, neg_one); - r.cast_unsigned() +pub fn _mm256_cmp_pd_mask(a: __m256d, b: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r = vcmppd256(a, b, IMM8, neg_one); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -28994,16 +30594,14 @@ pub unsafe fn _mm256_cmp_pd_mask(a: __m256d, b: __m256d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm256_mask_cmp_pd_mask( - k1: __mmask8, - a: __m256d, - b: __m256d, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f64x4(); - let b = b.as_f64x4(); - let r = vcmppd256(a, b, IMM8, k1 as i8); - r.cast_unsigned() +pub fn _mm256_mask_cmp_pd_mask(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x4(); + let b = b.as_f64x4(); + let r = vcmppd256(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -29014,13 +30612,15 @@ pub unsafe fn _mm256_mask_cmp_pd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_pd_mask(a: __m128d, b: __m128d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vcmppd128(a, b, IMM8, neg_one); - r.cast_unsigned() +pub fn _mm_cmp_pd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vcmppd128(a, b, IMM8, neg_one); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29031,16 +30631,14 @@ pub unsafe fn _mm_cmp_pd_mask(a: __m128d, b: __m128d) -> __mmas #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_pd_mask( - k1: __mmask8, - a: __m128d, - b: __m128d, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vcmppd128(a, b, IMM8, k1 as i8); - r.cast_unsigned() +pub fn _mm_mask_cmp_pd_mask(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vcmppd128(a, b, IMM8, k1 as i8); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\ @@ -29052,17 +30650,19 @@ pub unsafe fn _mm_mask_cmp_pd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm512_cmp_round_pd_mask( +pub fn _mm512_cmp_round_pd_mask( a: __m512d, b: __m512d, ) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let neg_one = -1; - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vcmppd(a, b, IMM5, neg_one, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\ @@ -29074,17 +30674,19 @@ pub unsafe fn _mm512_cmp_round_pd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm512_mask_cmp_round_pd_mask( +pub fn _mm512_mask_cmp_round_pd_mask( k1: __mmask8, a: __m512d, b: __m512d, ) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x8(); - let b = b.as_f64x8(); - let r = vcmppd(a, b, IMM5, k1 as i8, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x8(); + let b = b.as_f64x8(); + let r = vcmppd(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } } /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k. @@ -29094,7 +30696,7 @@ pub unsafe fn _mm512_mask_cmp_round_pd_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b) } @@ -29105,7 +30707,7 @@ pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b) } @@ -29116,7 +30718,7 @@ pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b) } @@ -29127,7 +30729,7 @@ pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd -pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { +pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b) } @@ -29139,11 +30741,13 @@ pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm_cmp_ss_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set). @@ -29154,14 +30758,12 @@ pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128) -> __mmask8 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_ss_mask( - k1: __mmask8, - a: __m128, - b: __m128, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm_mask_cmp_ss_mask(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\ @@ -29173,15 +30775,14 @@ pub unsafe fn _mm_mask_cmp_ss_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_cmp_round_ss_mask( - a: __m128, - b: __m128, -) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let neg_one = -1; - let r = vcmpss(a, b, IMM5, neg_one, SAE); - r.cast_unsigned() +pub fn _mm_cmp_round_ss_mask(a: __m128, b: __m128) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let r = vcmpss(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } } /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\ @@ -29193,15 +30794,17 @@ pub unsafe fn _mm_cmp_round_ss_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_mask_cmp_round_ss_mask( +pub fn _mm_mask_cmp_round_ss_mask( k1: __mmask8, a: __m128, b: __m128, ) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let r = vcmpss(a, b, IMM5, k1 as i8, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let r = vcmpss(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } } /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k. @@ -29212,11 +30815,13 @@ pub unsafe fn _mm_mask_cmp_round_ss_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let neg_one = -1; - let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm_cmp_sd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let neg_one = -1; + let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set). @@ -29227,14 +30832,12 @@ pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d) -> __mmas #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))] -pub unsafe fn _mm_mask_cmp_sd_mask( - k1: __mmask8, - a: __m128d, - b: __m128d, -) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 5); - let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); - r.cast_unsigned() +pub fn _mm_mask_cmp_sd_mask(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 5); + let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION); + r.cast_unsigned() + } } /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\ @@ -29246,15 +30849,14 @@ pub unsafe fn _mm_mask_cmp_sd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_cmp_round_sd_mask( - a: __m128d, - b: __m128d, -) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let neg_one = -1; - let r = vcmpsd(a, b, IMM5, neg_one, SAE); - r.cast_unsigned() +pub fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let neg_one = -1; + let r = vcmpsd(a, b, IMM5, neg_one, SAE); + r.cast_unsigned() + } } /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\ @@ -29266,15 +30868,17 @@ pub unsafe fn _mm_cmp_round_sd_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_mask_cmp_round_sd_mask( +pub fn _mm_mask_cmp_round_sd_mask( k1: __mmask8, a: __m128d, b: __m128d, ) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let r = vcmpsd(a, b, IMM5, k1 as i8, SAE); - r.cast_unsigned() + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let r = vcmpsd(a, b, IMM5, k1 as i8, SAE); + r.cast_unsigned() + } } /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k. @@ -29284,8 +30888,8 @@ pub unsafe fn _mm_mask_cmp_round_sd_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29295,7 +30899,7 @@ pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29306,8 +30910,8 @@ pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29317,7 +30921,7 @@ pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29328,8 +30932,8 @@ pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29339,7 +30943,7 @@ pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29350,8 +30954,8 @@ pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29361,7 +30965,7 @@ pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29372,8 +30976,8 @@ pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29383,7 +30987,7 @@ pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29394,8 +30998,8 @@ pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29405,7 +31009,7 @@ pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29416,8 +31020,8 @@ pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29427,7 +31031,7 @@ pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -29438,8 +31042,8 @@ pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29449,7 +31053,7 @@ pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -29460,8 +31064,8 @@ pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29471,7 +31075,7 @@ pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -29482,8 +31086,8 @@ pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29493,7 +31097,7 @@ pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -29504,8 +31108,8 @@ pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29515,7 +31119,7 @@ pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -29526,8 +31130,8 @@ pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29537,7 +31141,7 @@ pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -29548,8 +31152,8 @@ pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29559,7 +31163,7 @@ pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -29570,8 +31174,8 @@ pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29581,7 +31185,7 @@ pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -29592,8 +31196,8 @@ pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29603,7 +31207,7 @@ pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -29614,8 +31218,8 @@ pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) +pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) } } /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29625,7 +31229,7 @@ pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -29636,8 +31240,8 @@ pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_u32x8(), b.as_u32x8())) +pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x8(), b.as_u32x8())) } } /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29647,7 +31251,7 @@ pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -29658,8 +31262,8 @@ pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_u32x4(), b.as_u32x4())) +pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_u32x4(), b.as_u32x4())) } } /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29669,7 +31273,7 @@ pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud -pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -29681,24 +31285,23 @@ pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_cmp_epu32_mask( - a: __m512i, - b: __m512i, -) -> __mmask16 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x16(); - let b = b.as_u32x16(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x16(); + let b = b.as_u32x16(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29709,26 +31312,28 @@ pub unsafe fn _mm512_cmp_epu32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_mask_cmp_epu32_mask( +pub fn _mm512_mask_cmp_epu32_mask( k1: __mmask16, a: __m512i, b: __m512i, ) -> __mmask16 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x16(); - let b = b.as_u32x16(); - let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x16(); + let b = b.as_u32x16(); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -29739,24 +31344,23 @@ pub unsafe fn _mm512_mask_cmp_epu32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_cmp_epu32_mask( - a: __m256i, - b: __m256i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x8(); - let b = b.as_u32x8(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x8(); + let b = b.as_u32x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29767,26 +31371,28 @@ pub unsafe fn _mm256_cmp_epu32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_mask_cmp_epu32_mask( +pub fn _mm256_mask_cmp_epu32_mask( k1: __mmask8, a: __m256i, b: __m256i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x8(); - let b = b.as_u32x8(); - let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x8(); + let b = b.as_u32x8(); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -29797,21 +31403,23 @@ pub unsafe fn _mm256_mask_cmp_epu32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_cmp_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x4(); - let b = b.as_u32x4(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x4::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x4::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x4(); + let b = b.as_u32x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x4::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29822,26 +31430,28 @@ pub unsafe fn _mm_cmp_epu32_mask(a: __m128i, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_mask_cmp_epu32_mask( +pub fn _mm_mask_cmp_epu32_mask( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u32x4(); - let b = b.as_u32x4(); - let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x4::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u32x4(); + let b = b.as_u32x4(); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k. @@ -29851,8 +31461,8 @@ pub unsafe fn _mm_mask_cmp_epu32_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29862,7 +31472,7 @@ pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29873,8 +31483,8 @@ pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29884,7 +31494,7 @@ pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29895,8 +31505,8 @@ pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_lt(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_lt(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29906,7 +31516,7 @@ pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -29917,8 +31527,8 @@ pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29928,7 +31538,7 @@ pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29939,8 +31549,8 @@ pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29950,7 +31560,7 @@ pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29961,8 +31571,8 @@ pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_gt(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_gt(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29972,7 +31582,7 @@ pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -29983,8 +31593,8 @@ pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -29994,7 +31604,7 @@ pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30005,8 +31615,8 @@ pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30016,7 +31626,7 @@ pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30027,8 +31637,8 @@ pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_le(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_le(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30038,7 +31648,7 @@ pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30049,8 +31659,8 @@ pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30060,7 +31670,7 @@ pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30071,8 +31681,8 @@ pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30082,7 +31692,7 @@ pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30093,8 +31703,8 @@ pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ge(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ge(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30104,7 +31714,7 @@ pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30115,8 +31725,8 @@ pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30126,7 +31736,7 @@ pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30137,8 +31747,8 @@ pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30148,7 +31758,7 @@ pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30159,8 +31769,8 @@ pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_eq(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_eq(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30170,7 +31780,7 @@ pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30181,8 +31791,8 @@ pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) } } /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30192,7 +31802,7 @@ pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { +pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30203,8 +31813,8 @@ pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x8(), b.as_i32x8())) } } /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30214,7 +31824,7 @@ pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30225,8 +31835,8 @@ pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::(simd_ne(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::(simd_ne(a.as_i32x4(), b.as_i32x4())) } } /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30236,7 +31846,7 @@ pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd -pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30248,24 +31858,23 @@ pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_cmp_epi32_mask( - a: __m512i, - b: __m512i, -) -> __mmask16 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x16::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x16::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x16::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x16::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30276,26 +31885,28 @@ pub unsafe fn _mm512_cmp_epi32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_mask_cmp_epi32_mask( +pub fn _mm512_mask_cmp_epi32_mask( k1: __mmask16, a: __m512i, b: __m512i, ) -> __mmask16 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x16(); - let b = b.as_i32x16(); - let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x16::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x16(); + let b = b.as_i32x16(); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x16::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -30306,24 +31917,23 @@ pub unsafe fn _mm512_mask_cmp_epi32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_cmp_epi32_mask( - a: __m256i, - b: __m256i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30334,26 +31944,28 @@ pub unsafe fn _mm256_cmp_epi32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_mask_cmp_epi32_mask( +pub fn _mm256_mask_cmp_epi32_mask( k1: __mmask8, a: __m256i, b: __m256i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x8(); - let b = b.as_i32x8(); - let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x8(); + let b = b.as_i32x8(); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -30364,21 +31976,23 @@ pub unsafe fn _mm256_mask_cmp_epi32_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_cmp_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i32x4::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i32x4::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i32x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i32x4::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30389,26 +32003,28 @@ pub unsafe fn _mm_cmp_epi32_mask(a: __m128i, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_mask_cmp_epi32_mask( +pub fn _mm_mask_cmp_epi32_mask( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i32x4(); - let b = b.as_i32x4(); - let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x4::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i32x4(); + let b = b.as_i32x4(); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i32x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k. @@ -30418,8 +32034,8 @@ pub unsafe fn _mm_mask_cmp_epi32_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30429,7 +32045,7 @@ pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -30440,8 +32056,8 @@ pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30451,7 +32067,7 @@ pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -30462,8 +32078,8 @@ pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30473,7 +32089,7 @@ pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -30484,8 +32100,8 @@ pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30495,7 +32111,7 @@ pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -30506,8 +32122,8 @@ pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30517,7 +32133,7 @@ pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -30528,8 +32144,8 @@ pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30539,7 +32155,7 @@ pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -30550,8 +32166,8 @@ pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30561,7 +32177,7 @@ pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30572,8 +32188,8 @@ pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30583,7 +32199,7 @@ pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30594,8 +32210,8 @@ pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30605,7 +32221,7 @@ pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -30616,8 +32232,8 @@ pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30627,7 +32243,7 @@ pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30638,8 +32254,8 @@ pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30649,7 +32265,7 @@ pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30660,8 +32276,8 @@ pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30671,7 +32287,7 @@ pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -30682,8 +32298,8 @@ pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30693,7 +32309,7 @@ pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30704,8 +32320,8 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30715,7 +32331,7 @@ pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30726,8 +32342,8 @@ pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30737,7 +32353,7 @@ pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -30748,8 +32364,8 @@ pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) +pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) } } /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30759,7 +32375,7 @@ pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30770,8 +32386,8 @@ pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) +pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) } } /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30781,7 +32397,7 @@ pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30792,8 +32408,8 @@ pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) +pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) } } /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30803,7 +32419,7 @@ pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq -pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -30815,24 +32431,23 @@ pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_cmp_epu64_mask( - a: __m512i, - b: __m512i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x8(); - let b = b.as_u64x8(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30843,26 +32458,28 @@ pub unsafe fn _mm512_cmp_epu64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_mask_cmp_epu64_mask( +pub fn _mm512_mask_cmp_epu64_mask( k1: __mmask8, a: __m512i, b: __m512i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x8(); - let b = b.as_u64x8(); - let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -30873,24 +32490,23 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_cmp_epu64_mask( - a: __m256i, - b: __m256i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x4(); - let b = b.as_u64x4(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x4::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x4::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x4::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30901,26 +32517,28 @@ pub unsafe fn _mm256_cmp_epu64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_mask_cmp_epu64_mask( +pub fn _mm256_mask_cmp_epu64_mask( k1: __mmask8, a: __m256i, b: __m256i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x4(); - let b = b.as_u64x4(); - let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x4::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x4(); + let b = b.as_u64x4(); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -30931,21 +32549,23 @@ pub unsafe fn _mm256_mask_cmp_epu64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_cmp_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x2(); - let b = b.as_u64x2(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x2::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x2::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x2::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x2::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30956,26 +32576,28 @@ pub unsafe fn _mm_cmp_epu64_mask(a: __m128i, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_mask_cmp_epu64_mask( +pub fn _mm_mask_cmp_epu64_mask( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_u64x2(); - let b = b.as_u64x2(); - let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x2::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x2::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k. @@ -30985,8 +32607,8 @@ pub unsafe fn _mm_mask_cmp_epu64_mask( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -30996,7 +32618,7 @@ pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -31007,8 +32629,8 @@ pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31018,7 +32640,7 @@ pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -31029,8 +32651,8 @@ pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31040,7 +32662,7 @@ pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b) } @@ -31051,8 +32673,8 @@ pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31062,7 +32684,7 @@ pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -31073,8 +32695,8 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31084,7 +32706,7 @@ pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -31095,8 +32717,8 @@ pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31106,7 +32728,7 @@ pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b) } @@ -31117,8 +32739,8 @@ pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31128,7 +32750,7 @@ pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -31139,8 +32761,8 @@ pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31150,7 +32772,7 @@ pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -31161,8 +32783,8 @@ pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31172,7 +32794,7 @@ pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b) } @@ -31183,8 +32805,8 @@ pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31194,7 +32816,7 @@ pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -31205,8 +32827,8 @@ pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31216,7 +32838,7 @@ pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -31227,8 +32849,8 @@ pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31238,7 +32860,7 @@ pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b) } @@ -31249,8 +32871,8 @@ pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31260,7 +32882,7 @@ pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -31271,8 +32893,8 @@ pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31282,7 +32904,7 @@ pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -31293,8 +32915,8 @@ pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31304,7 +32926,7 @@ pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b) } @@ -31315,8 +32937,8 @@ pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) +pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) } } /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31326,7 +32948,7 @@ pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { +pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -31337,8 +32959,8 @@ pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { - simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) +pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) } } /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31348,7 +32970,7 @@ pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { +pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 { _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -31359,8 +32981,8 @@ pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) } } /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31370,7 +32992,7 @@ pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq -pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { +pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b) } @@ -31382,24 +33004,23 @@ pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_cmp_epi64_mask( - a: __m512i, - b: __m512i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x8::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x8::splat(-1), - }; - simd_bitmask(r) +pub fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x8::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x8::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31410,26 +33031,28 @@ pub unsafe fn _mm512_cmp_epi64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm512_mask_cmp_epi64_mask( +pub fn _mm512_mask_cmp_epi64_mask( k1: __mmask8, a: __m512i, b: __m512i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x8(); - let b = b.as_i64x8(); - let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x8::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x8(); + let b = b.as_i64x8(); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x8::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -31440,24 +33063,23 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_cmp_epi64_mask( - a: __m256i, - b: __m256i, -) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x4::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x4::splat(-1), - }; - simd_bitmask(r) +pub fn _mm256_cmp_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x4::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x4::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31468,26 +33090,28 @@ pub unsafe fn _mm256_cmp_epi64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm256_mask_cmp_epi64_mask( +pub fn _mm256_mask_cmp_epi64_mask( k1: __mmask8, a: __m256i, b: __m256i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x4(); - let b = b.as_i64x4(); - let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x4::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x4(); + let b = b.as_i64x4(); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x4::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. @@ -31498,21 +33122,23 @@ pub unsafe fn _mm256_mask_cmp_epi64_mask( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_cmp_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x2(); - let b = b.as_i64x2(); - let r = match IMM3 { - 0 => simd_eq(a, b), - 1 => simd_lt(a, b), - 2 => simd_le(a, b), - 3 => i64x2::ZERO, - 4 => simd_ne(a, b), - 5 => simd_ge(a, b), - 6 => simd_gt(a, b), - _ => i64x2::splat(-1), - }; - simd_bitmask(r) +pub fn _mm_cmp_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let r = match IMM3 { + 0 => simd_eq(a, b), + 1 => simd_lt(a, b), + 2 => simd_le(a, b), + 3 => i64x2::ZERO, + 4 => simd_ne(a, b), + 5 => simd_ge(a, b), + 6 => simd_gt(a, b), + _ => i64x2::splat(-1), + }; + simd_bitmask(r) + } } /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). @@ -31523,26 +33149,28 @@ pub unsafe fn _mm_cmp_epi64_mask(a: __m128i, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[rustc_legacy_const_generics(3)] #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))] -pub unsafe fn _mm_mask_cmp_epi64_mask( +pub fn _mm_mask_cmp_epi64_mask( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask8 { - static_assert_uimm_bits!(IMM3, 3); - let a = a.as_i64x2(); - let b = b.as_i64x2(); - let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); - let r = match IMM3 { - 0 => simd_and(k1, simd_eq(a, b)), - 1 => simd_and(k1, simd_lt(a, b)), - 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x2::ZERO, - 4 => simd_and(k1, simd_ne(a, b)), - 5 => simd_and(k1, simd_ge(a, b)), - 6 => simd_and(k1, simd_gt(a, b)), - _ => k1, - }; - simd_bitmask(r) + unsafe { + static_assert_uimm_bits!(IMM3, 3); + let a = a.as_i64x2(); + let b = b.as_i64x2(); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); + let r = match IMM3 { + 0 => simd_and(k1, simd_eq(a, b)), + 1 => simd_and(k1, simd_lt(a, b)), + 2 => simd_and(k1, simd_le(a, b)), + 3 => i64x2::ZERO, + 4 => simd_and(k1, simd_ne(a, b)), + 5 => simd_and(k1, simd_ge(a, b)), + 6 => simd_and(k1, simd_gt(a, b)), + _ => k1, + }; + simd_bitmask(r) + } } /// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a. @@ -31551,8 +33179,8 @@ pub unsafe fn _mm_mask_cmp_epi64_mask( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { - simd_reduce_add_unordered(a.as_i32x16()) +pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_add_unordered(a.as_i32x16()) } } /// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -31561,8 +33189,8 @@ pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) +pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } } /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a. @@ -31571,8 +33199,8 @@ pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { - simd_reduce_add_unordered(a.as_i64x8()) +pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_add_unordered(a.as_i64x8()) } } /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -31581,8 +33209,8 @@ pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a. @@ -31591,15 +33219,17 @@ pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 { - // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ - let a = _mm256_add_ps( - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), - ); - let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); - let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); - simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1) +pub fn _mm512_reduce_add_ps(a: __m512) -> f32 { + unsafe { + // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ + let a = _mm256_add_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1) + } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a. @@ -31608,8 +33238,8 @@ pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 { - _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) +pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 { + unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a. @@ -31618,13 +33248,15 @@ pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 { - let a = _mm256_add_pd( - _mm512_extractf64x4_pd::<0>(a), - _mm512_extractf64x4_pd::<1>(a), - ); - let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); - simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1) +pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_add_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1) + } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a. @@ -31633,8 +33265,8 @@ pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 { - _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) +pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 { + unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) } } /// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a. @@ -31643,8 +33275,8 @@ pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { - simd_reduce_mul_unordered(a.as_i32x16()) +pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_mul_unordered(a.as_i32x16()) } } /// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -31653,12 +33285,14 @@ pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_mul_unordered(simd_select_bitmask( - k, - a.as_i32x16(), - _mm512_set1_epi32(1).as_i32x16(), - )) +pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_mul_unordered(simd_select_bitmask( + k, + a.as_i32x16(), + _mm512_set1_epi32(1).as_i32x16(), + )) + } } /// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a. @@ -31667,8 +33301,8 @@ pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { - simd_reduce_mul_unordered(a.as_i64x8()) +pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_mul_unordered(a.as_i64x8()) } } /// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -31677,12 +33311,14 @@ pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_mul_unordered(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_set1_epi64(1).as_i64x8(), - )) +pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { + simd_reduce_mul_unordered(simd_select_bitmask( + k, + a.as_i64x8(), + _mm512_set1_epi64(1).as_i64x8(), + )) + } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a. @@ -31691,15 +33327,17 @@ pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 { - // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ - let a = _mm256_mul_ps( - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), - ); - let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); - let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); - simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1) +pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 { + unsafe { + // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ + let a = _mm256_mul_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1) + } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a. @@ -31708,8 +33346,8 @@ pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 { - _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) +pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 { + unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a. @@ -31718,13 +33356,15 @@ pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 { - let a = _mm256_mul_pd( - _mm512_extractf64x4_pd::<0>(a), - _mm512_extractf64x4_pd::<1>(a), - ); - let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); - simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1) +pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_mul_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1) + } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a. @@ -31733,8 +33373,8 @@ pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 { - _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) +pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 { + unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) } } /// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -31743,8 +33383,8 @@ pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 { - simd_reduce_max(a.as_i32x16()) +pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_max(a.as_i32x16()) } } /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31753,12 +33393,14 @@ pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_max(simd_select_bitmask( - k, - a.as_i32x16(), - i32x16::splat(i32::MIN), - )) +pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_max(simd_select_bitmask( + k, + a.as_i32x16(), + i32x16::splat(i32::MIN), + )) + } } /// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -31767,8 +33409,8 @@ pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 { - simd_reduce_max(a.as_i64x8()) +pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_max(a.as_i64x8()) } } /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31777,8 +33419,8 @@ pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) +pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) } } /// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -31787,8 +33429,8 @@ pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 { - simd_reduce_max(a.as_u32x16()) +pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 { + unsafe { simd_reduce_max(a.as_u32x16()) } } /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31797,8 +33439,8 @@ pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { - simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) +pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) } } /// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -31807,8 +33449,8 @@ pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 { - simd_reduce_max(a.as_u64x8()) +pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 { + unsafe { simd_reduce_max(a.as_u64x8()) } } /// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31817,8 +33459,8 @@ pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { - simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) +pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { + unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a. @@ -31827,14 +33469,16 @@ pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 { - let a = _mm256_max_ps( - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), - ); - let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); - let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); - _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a))) +pub fn _mm512_reduce_max_ps(a: __m512) -> f32 { + unsafe { + let a = _mm256_max_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a))) + } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31843,7 +33487,7 @@ pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 { +pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 { _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a)) } @@ -31853,13 +33497,15 @@ pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 { - let a = _mm256_max_pd( - _mm512_extractf64x4_pd::<0>(a), - _mm512_extractf64x4_pd::<1>(a), - ); - let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); - _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0]))) +pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_max_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0]))) + } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a. @@ -31868,7 +33514,7 @@ pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 { +pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 { _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a)) } @@ -31878,8 +33524,8 @@ pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 { - simd_reduce_min(a.as_i32x16()) +pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_min(a.as_i32x16()) } } /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -31888,12 +33534,14 @@ pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_min(simd_select_bitmask( - k, - a.as_i32x16(), - i32x16::splat(i32::MAX), - )) +pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { + simd_reduce_min(simd_select_bitmask( + k, + a.as_i32x16(), + i32x16::splat(i32::MAX), + )) + } } /// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -31902,8 +33550,8 @@ pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 { - simd_reduce_min(a.as_i64x8()) +pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_min(a.as_i64x8()) } } /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -31912,8 +33560,8 @@ pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) +pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) } } /// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -31922,8 +33570,8 @@ pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 { - simd_reduce_min(a.as_u32x16()) +pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 { + unsafe { simd_reduce_min(a.as_u32x16()) } } /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -31932,12 +33580,14 @@ pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 { - simd_reduce_min(simd_select_bitmask( - k, - a.as_u32x16(), - u32x16::splat(u32::MAX), - )) +pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 { + unsafe { + simd_reduce_min(simd_select_bitmask( + k, + a.as_u32x16(), + u32x16::splat(u32::MAX), + )) + } } /// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -31946,8 +33596,8 @@ pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 { - simd_reduce_min(a.as_u64x8()) +pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 { + unsafe { simd_reduce_min(a.as_u64x8()) } } /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -31956,8 +33606,8 @@ pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 { - simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) +pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 { + unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a. @@ -31966,14 +33616,16 @@ pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 { - let a = _mm256_min_ps( - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), - simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), - ); - let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); - let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); - _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a))) +pub fn _mm512_reduce_min_ps(a: __m512) -> f32 { + unsafe { + let a = _mm256_min_ps( + simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]), + simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]), + ); + let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a)); + let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1])); + _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a))) + } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -31982,7 +33634,7 @@ pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 { +pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 { _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a)) } @@ -31992,13 +33644,15 @@ pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 { - let a = _mm256_min_pd( - _mm512_extractf64x4_pd::<0>(a), - _mm512_extractf64x4_pd::<1>(a), - ); - let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); - _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0]))) +pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 { + unsafe { + let a = _mm256_min_pd( + _mm512_extractf64x4_pd::<0>(a), + _mm512_extractf64x4_pd::<1>(a), + ); + let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a)); + _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0]))) + } } /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a. @@ -32007,7 +33661,7 @@ pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 { +pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 { _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a)) } @@ -32017,8 +33671,8 @@ pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 { - simd_reduce_and(a.as_i32x16()) +pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_and(a.as_i32x16()) } } /// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a. @@ -32027,8 +33681,8 @@ pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) +pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) } } /// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -32037,8 +33691,8 @@ pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 { - simd_reduce_and(a.as_i64x8()) +pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_and(a.as_i64x8()) } } /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -32047,8 +33701,8 @@ pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) +pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) } } /// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -32057,8 +33711,8 @@ pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 { - simd_reduce_or(a.as_i32x16()) +pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 { + unsafe { simd_reduce_or(a.as_i32x16()) } } /// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -32067,8 +33721,8 @@ pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) +pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } } /// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -32077,8 +33731,8 @@ pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 { - simd_reduce_or(a.as_i64x8()) +pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 { + unsafe { simd_reduce_or(a.as_i64x8()) } } /// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a. @@ -32087,8 +33741,8 @@ pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) +pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { + unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } } /// Returns vector of type `__m512d` with indeterminate elements. @@ -32100,8 +33754,8 @@ pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_pd() -> __m512d { - const { mem::zeroed() } +pub fn _mm512_undefined_pd() -> __m512d { + unsafe { const { mem::zeroed() } } } /// Returns vector of type `__m512` with indeterminate elements. @@ -32113,8 +33767,8 @@ pub unsafe fn _mm512_undefined_pd() -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_ps() -> __m512 { - const { mem::zeroed() } +pub fn _mm512_undefined_ps() -> __m512 { + unsafe { const { mem::zeroed() } } } /// Return vector of type __m512i with indeterminate elements. @@ -32126,8 +33780,8 @@ pub unsafe fn _mm512_undefined_ps() -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_epi32() -> __m512i { - const { mem::zeroed() } +pub fn _mm512_undefined_epi32() -> __m512i { + unsafe { const { mem::zeroed() } } } /// Return vector of type __m512 with indeterminate elements. @@ -32139,8 +33793,8 @@ pub unsafe fn _mm512_undefined_epi32() -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined() -> __m512 { - const { mem::zeroed() } +pub fn _mm512_undefined() -> __m512 { + unsafe { const { mem::zeroed() } } } /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. @@ -34384,7 +36038,7 @@ pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_setr_pd( +pub fn _mm512_setr_pd( e0: f64, e1: f64, e2: f64, @@ -34394,8 +36048,10 @@ pub unsafe fn _mm512_setr_pd( e6: f64, e7: f64, ) -> __m512d { - let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) + unsafe { + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) + } } /// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values. @@ -34404,7 +36060,7 @@ pub unsafe fn _mm512_setr_pd( #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm512_set_pd( +pub fn _mm512_set_pd( e0: f64, e1: f64, e2: f64, @@ -34424,13 +36080,15 @@ pub unsafe fn _mm512_set_pd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovss))] -pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract!(src, 0); - let mut mov: f32 = extractsrc; - if (k & 0b00000001) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut mov: f32 = extractsrc; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34440,12 +36098,14 @@ pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovss))] -pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mut mov: f32 = 0.; - if (k & 0b00000001) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut mov: f32 = 0.; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34455,13 +36115,15 @@ pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsd))] -pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract!(src, 0); - let mut mov: f64 = extractsrc; - if (k & 0b00000001) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut mov: f64 = extractsrc; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34471,12 +36133,14 @@ pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovsd))] -pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mut mov: f64 = 0.; - if (k & 0b00000001) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut mov: f64 = 0.; + if (k & 0b00000001) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34486,15 +36150,17 @@ pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss))] -pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract!(src, 0); - let mut add: f32 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta + extractb; +pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34504,14 +36170,16 @@ pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss))] -pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mut add: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta + extractb; +pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34521,15 +36189,17 @@ pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd))] -pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract!(src, 0); - let mut add: f64 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta + extractb; +pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34539,14 +36209,16 @@ pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd))] -pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mut add: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta + extractb; +pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta + extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34556,15 +36228,17 @@ pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss))] -pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract!(src, 0); - let mut add: f32 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta - extractb; +pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34574,14 +36248,16 @@ pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss))] -pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mut add: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta - extractb; +pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34591,15 +36267,17 @@ pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd))] -pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract!(src, 0); - let mut add: f64 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta - extractb; +pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34609,14 +36287,16 @@ pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd))] -pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mut add: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta - extractb; +pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta - extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34626,15 +36306,17 @@ pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss))] -pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract!(src, 0); - let mut add: f32 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta * extractb; +pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34644,14 +36326,16 @@ pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss))] -pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mut add: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta * extractb; +pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34661,15 +36345,17 @@ pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd))] -pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract!(src, 0); - let mut add: f64 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta * extractb; +pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34679,14 +36365,16 @@ pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd))] -pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mut add: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta * extractb; +pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta * extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34696,15 +36384,17 @@ pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss))] -pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let extractsrc: f32 = simd_extract!(src, 0); - let mut add: f32 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta / extractb; +pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let extractsrc: f32 = simd_extract!(src, 0); + let mut add: f32 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34714,14 +36404,16 @@ pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss))] -pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - let mut add: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - add = extracta / extractb; +pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let mut add: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34731,15 +36423,17 @@ pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd))] -pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let extractsrc: f64 = simd_extract!(src, 0); - let mut add: f64 = extractsrc; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta / extractb; +pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let extractsrc: f64 = simd_extract!(src, 0); + let mut add: f64 = extractsrc; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34749,14 +36443,16 @@ pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd))] -pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - let mut add: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - add = extracta / extractb; +pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + let mut add: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + add = extracta / extractb; + } + simd_insert!(a, 0, add) } - simd_insert!(a, 0, add) } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34766,14 +36462,16 @@ pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxss))] -pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vmaxss( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vmaxss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34783,14 +36481,16 @@ pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxss))] -pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vmaxss( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vmaxss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34800,14 +36500,16 @@ pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxsd))] -pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vmaxsd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vmaxsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34817,14 +36519,16 @@ pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxsd))] -pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vmaxsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vmaxsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34834,14 +36538,16 @@ pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminss))] -pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vminss( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vminss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34851,14 +36557,16 @@ pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminss))] -pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vminss( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vminss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34868,14 +36576,16 @@ pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminsd))] -pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vminsd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vminsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34885,14 +36595,16 @@ pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminsd))] -pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vminsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vminsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34902,8 +36614,8 @@ pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtss))] -pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -34913,8 +36625,8 @@ pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtss))] -pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) } } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34924,8 +36636,8 @@ pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtsd))] -pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -34935,8 +36647,8 @@ pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtsd))] -pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) } } /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -34946,8 +36658,8 @@ pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ss))] -pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 { - transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) +pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } } /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -34957,8 +36669,8 @@ pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ss))] -pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } } /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -34968,8 +36680,8 @@ pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ss))] -pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } } /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -34979,8 +36691,8 @@ pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14sd))] -pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) +pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } } /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -34990,8 +36702,8 @@ pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14sd))] -pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } } /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35001,8 +36713,8 @@ pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m1 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14sd))] -pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } } /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35012,8 +36724,8 @@ pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ss))] -pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 { - transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) +pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } } /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35023,8 +36735,8 @@ pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ss))] -pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) +pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) } } /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35034,8 +36746,8 @@ pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ss))] -pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) +pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } } /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35045,8 +36757,8 @@ pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14sd))] -pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) +pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } } /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35056,8 +36768,8 @@ pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14sd))] -pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) +pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) } } /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35067,8 +36779,8 @@ pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14sd))] -pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) +pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35078,14 +36790,16 @@ pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss))] -pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 { - transmute(vgetexpss( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - 0b1, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + 0b1, + _MM_FROUND_NO_EXC, + )) + } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35095,14 +36809,16 @@ pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss))] -pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vgetexpss( - a.as_f32x4(), - b.as_f32x4(), - src.as_f32x4(), - k, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + src.as_f32x4(), + k, + _MM_FROUND_NO_EXC, + )) + } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35112,14 +36828,16 @@ pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss))] -pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vgetexpss( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - k, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vgetexpss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_NO_EXC, + )) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35129,14 +36847,16 @@ pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd))] -pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vgetexpsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - 0b1, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b1, + _MM_FROUND_NO_EXC, + )) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35146,14 +36866,16 @@ pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd))] -pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vgetexpsd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_NO_EXC, + )) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -35163,14 +36885,16 @@ pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd))] -pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vgetexpsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - k, - _MM_FROUND_NO_EXC, - )) +pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vgetexpsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_NO_EXC, + )) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35191,26 +36915,25 @@ pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_getmant_ss< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm_getmant_ss( a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetmantss( - a, - b, - SIGN << 2 | NORM, - f32x4::ZERO, - 0b1, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35231,7 +36954,7 @@ pub unsafe fn _mm_getmant_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_getmant_ss< +pub fn _mm_mask_getmant_ss< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -35240,13 +36963,15 @@ pub unsafe fn _mm_mask_getmant_ss< a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35267,7 +36992,7 @@ pub unsafe fn _mm_mask_getmant_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_maskz_getmant_ss< +pub fn _mm_maskz_getmant_ss< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -35275,19 +37000,21 @@ pub unsafe fn _mm_maskz_getmant_ss< a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetmantss( - a, - b, - SIGN << 2 | NORM, - f32x4::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35308,26 +37035,25 @@ pub unsafe fn _mm_maskz_getmant_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_getmant_sd< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm_getmant_sd( a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetmantsd( - a, - b, - SIGN << 2 | NORM, - f64x2::ZERO, - 0b1, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35348,7 +37074,7 @@ pub unsafe fn _mm_getmant_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_getmant_sd< +pub fn _mm_mask_getmant_sd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -35357,13 +37083,15 @@ pub unsafe fn _mm_mask_getmant_sd< a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -35384,7 +37112,7 @@ pub unsafe fn _mm_mask_getmant_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_maskz_getmant_sd< +pub fn _mm_maskz_getmant_sd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -35392,19 +37120,21 @@ pub unsafe fn _mm_maskz_getmant_sd< a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetmantsd( - a, - b, - SIGN << 2 | NORM, - f64x2::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -35421,19 +37151,21 @@ pub unsafe fn _mm_maskz_getmant_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vrndscaless( - a, - b, - f32x4::ZERO, - 0b11111111, - IMM8, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) +pub fn _mm_roundscale_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless( + a, + b, + f32x4::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -35450,18 +37182,20 @@ pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_roundscale_ss( +pub fn _mm_mask_roundscale_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -35478,16 +37212,14 @@ pub unsafe fn _mm_mask_roundscale_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_roundscale_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm_maskz_roundscale_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -35504,19 +37236,21 @@ pub unsafe fn _mm_maskz_roundscale_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vrndscalesd( - a, - b, - f64x2::ZERO, - 0b11111111, - IMM8, - _MM_FROUND_CUR_DIRECTION, - ); - transmute(r) +pub fn _mm_roundscale_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd( + a, + b, + f64x2::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -35533,18 +37267,20 @@ pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d) -> __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_roundscale_sd( +pub fn _mm_mask_roundscale_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -35561,16 +37297,14 @@ pub unsafe fn _mm_mask_roundscale_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_roundscale_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); - transmute(r) +pub fn _mm_maskz_roundscale_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35580,16 +37314,18 @@ pub unsafe fn _mm_maskz_roundscale_sd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss))] -pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 { - let a = a.as_f32x4(); - let b = b.as_f32x4(); - transmute(vscalefss( - a, - b, - f32x4::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + transmute(vscalefss( + a, + b, + f32x4::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35599,11 +37335,13 @@ pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss))] -pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)) +pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35613,14 +37351,16 @@ pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss))] -pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vscalefss( - a.as_f32x4(), - b.as_f32x4(), - f32x4::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + transmute(vscalefss( + a.as_f32x4(), + b.as_f32x4(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -35630,14 +37370,16 @@ pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd))] -pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - 0b11111111, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35647,14 +37389,16 @@ pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd))] -pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefsd( - a.as_f64x2(), - b.as_f64x2(), - src.as_f64x2(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35664,14 +37408,16 @@ pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd))] -pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefsd( - a.as_f64x2(), - b.as_f64x2(), - f64x2::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + transmute(vscalefsd( + a.as_f64x2(), + b.as_f64x2(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35681,14 +37427,16 @@ pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fmadd: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fmadd = fmaf32(fmadd, extractb, extractc); +pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = fmaf32(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35698,15 +37446,17 @@ pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - let mut fmadd: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fmadd = fmaf32(extracta, extractb, extractc); +pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -35716,14 +37466,16 @@ pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fmadd: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - fmadd = fmaf32(extracta, extractb, fmadd); +pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + fmadd = fmaf32(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35733,14 +37485,16 @@ pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fmadd: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fmadd = fmaf64(fmadd, extractb, extractc); +pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = fmaf64(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35750,15 +37504,17 @@ pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let mut fmadd: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fmadd = fmaf64(extracta, extractb, extractc); +pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -35768,14 +37524,16 @@ pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd))] -pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fmadd: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - fmadd = fmaf64(extracta, extractb, fmadd); +pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + fmadd = fmaf64(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35785,15 +37543,17 @@ pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fmsub: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = fmaf32(fmsub, extractb, extractc); +pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf32(fmsub, extractb, extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35803,16 +37563,18 @@ pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - let mut fmsub: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = fmaf32(extracta, extractb, extractc); +pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -35822,15 +37584,17 @@ pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fmsub: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc = -fmsub; - fmsub = fmaf32(extracta, extractb, extractc); +pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35840,15 +37604,17 @@ pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fmsub: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = fmaf64(fmsub, extractb, extractc); +pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf64(fmsub, extractb, extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35858,16 +37624,18 @@ pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let mut fmsub: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = fmaf64(extracta, extractb, extractc); +pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -35877,15 +37645,17 @@ pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub))] -pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fmsub: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc = -fmsub; - fmsub = fmaf64(extracta, extractb, extractc); +pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35895,15 +37665,17 @@ pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fnmadd: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmadd; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fnmadd = fmaf32(extracta, extractb, extractc); +pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -35913,16 +37685,18 @@ pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - let mut fnmadd: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fnmadd = fmaf32(extracta, extractb, extractc); +pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -35932,15 +37706,17 @@ pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fnmadd: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - fnmadd = fmaf32(extracta, extractb, fnmadd); +pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fnmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + fnmadd = fmaf32(extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35950,15 +37726,17 @@ pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fnmadd: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmadd; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fnmadd = fmaf64(extracta, extractb, extractc); +pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -35968,16 +37746,18 @@ pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let mut fnmadd: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fnmadd = fmaf64(extracta, extractb, extractc); +pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -35987,15 +37767,17 @@ pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd))] -pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fnmadd: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - fnmadd = fmaf64(extracta, extractb, fnmadd); +pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fnmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + fnmadd = fmaf64(extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36005,16 +37787,18 @@ pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { - let mut fnmsub: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmsub; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = fmaf32(extracta, extractb, extractc); +pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36024,17 +37808,19 @@ pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) - #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { - let mut fnmsub: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = fmaf32(extracta, extractb, extractc); +pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let mut fnmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst. @@ -36044,16 +37830,18 @@ pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { - let mut fnmsub: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc = -fnmsub; - fnmsub = fmaf32(extracta, extractb, extractc); +pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 { + unsafe { + let mut fnmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = fmaf32(extracta, extractb, extractc); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36063,16 +37851,18 @@ pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { - let mut fnmsub: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmsub; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = fmaf64(extracta, extractb, extractc); +pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -36082,17 +37872,19 @@ pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let mut fnmsub: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = fmaf64(extracta, extractb, extractc); +pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let mut fnmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst. @@ -36102,16 +37894,18 @@ pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128 #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub))] -pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { - let mut fnmsub: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc = -fnmsub; - fnmsub = fmaf64(extracta, extractb, extractc); +pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d { + unsafe { + let mut fnmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = fmaf64(extracta, extractb, extractc); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36129,12 +37923,14 @@ pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_add_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36152,18 +37948,20 @@ pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_add_round_ss( +pub fn _mm_mask_add_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vaddss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vaddss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36181,16 +37979,14 @@ pub unsafe fn _mm_mask_add_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_add_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -36208,12 +38004,14 @@ pub unsafe fn _mm_maskz_add_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_add_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36231,18 +38029,20 @@ pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_add_round_sd( +pub fn _mm_mask_add_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vaddsd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vaddsd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36260,16 +38060,14 @@ pub unsafe fn _mm_mask_add_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_add_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_add_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36287,12 +38085,14 @@ pub unsafe fn _mm_maskz_add_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36310,18 +38110,20 @@ pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_sub_round_ss( +pub fn _mm_mask_sub_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vsubss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vsubss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36339,16 +38141,14 @@ pub unsafe fn _mm_mask_sub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_sub_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -36366,12 +38166,14 @@ pub unsafe fn _mm_maskz_sub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36389,18 +38191,20 @@ pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_sub_round_sd( +pub fn _mm_mask_sub_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vsubsd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vsubsd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36418,16 +38222,14 @@ pub unsafe fn _mm_mask_sub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_sub_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_sub_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36445,12 +38247,14 @@ pub unsafe fn _mm_maskz_sub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36468,18 +38272,20 @@ pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_mul_round_ss( +pub fn _mm_mask_mul_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vmulss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vmulss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36497,16 +38303,14 @@ pub unsafe fn _mm_mask_mul_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_mul_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -36524,12 +38328,14 @@ pub unsafe fn _mm_maskz_mul_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36547,18 +38353,20 @@ pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_mul_round_sd( +pub fn _mm_mask_mul_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vmulsd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vmulsd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36576,16 +38384,14 @@ pub unsafe fn _mm_mask_mul_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_mul_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_mul_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36603,12 +38409,14 @@ pub unsafe fn _mm_maskz_mul_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_div_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36626,18 +38434,20 @@ pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_div_round_ss( +pub fn _mm_mask_div_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vdivss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vdivss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36655,16 +38465,14 @@ pub unsafe fn _mm_mask_div_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_div_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -36682,12 +38490,14 @@ pub unsafe fn _mm_maskz_div_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING); - transmute(r) +pub fn _mm_div_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING); + transmute(r) + } } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36705,18 +38515,20 @@ pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_div_round_sd( +pub fn _mm_mask_div_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vdivsd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vdivsd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36734,16 +38546,14 @@ pub unsafe fn _mm_mask_div_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_div_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_div_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36755,12 +38565,14 @@ pub unsafe fn _mm_maskz_div_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36772,18 +38584,20 @@ pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_max_round_ss( +pub fn _mm_mask_max_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vmaxss(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vmaxss(a, b, src, k, SAE); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36795,12 +38609,14 @@ pub unsafe fn _mm_mask_max_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxss, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vmaxss(a, b, f32x4::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vmaxss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -36812,12 +38628,14 @@ pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36829,18 +38647,20 @@ pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_max_round_sd( +pub fn _mm_mask_max_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vmaxsd(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vmaxsd(a, b, src, k, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36852,16 +38672,14 @@ pub unsafe fn _mm_mask_max_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_max_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vmaxsd(a, b, f64x2::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vmaxsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36873,12 +38691,14 @@ pub unsafe fn _mm_maskz_max_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminss, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vminss(a, b, f32x4::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vminss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36890,18 +38710,20 @@ pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminss, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_min_round_ss( +pub fn _mm_mask_min_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vminss(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vminss(a, b, src, k, SAE); + transmute(r) + } } /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36913,12 +38735,14 @@ pub unsafe fn _mm_mask_min_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminss, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vminss(a, b, f32x4::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vminss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\ @@ -36930,12 +38754,14 @@ pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminsd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36947,18 +38773,20 @@ pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminsd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_min_round_sd( +pub fn _mm_mask_min_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vminsd(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vminsd(a, b, src, k, SAE); + transmute(r) + } } /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -36970,16 +38798,14 @@ pub unsafe fn _mm_mask_min_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vminsd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_min_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vminsd(a, b, f64x2::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vminsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -36997,9 +38823,11 @@ pub unsafe fn _mm_maskz_min_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING) +pub fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING) + } } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37017,14 +38845,16 @@ pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_sqrt_round_ss( +pub fn _mm_mask_sqrt_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - vsqrtss(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, src, k, ROUNDING) + } } /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37042,13 +38872,11 @@ pub unsafe fn _mm_mask_sqrt_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_sqrt_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING) +pub fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING) + } } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -37066,9 +38894,11 @@ pub unsafe fn _mm_maskz_sqrt_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING) +pub fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING) + } } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37086,14 +38916,16 @@ pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_sqrt_round_sd( +pub fn _mm_mask_sqrt_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - vsqrtsd(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, src, k, ROUNDING) + } } /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37111,13 +38943,15 @@ pub unsafe fn _mm_mask_sqrt_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_sqrt_round_sd( +pub fn _mm_maskz_sqrt_round_sd( k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING) + } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37129,12 +38963,14 @@ pub unsafe fn _mm_maskz_sqrt_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE); + transmute(r) + } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37146,18 +38982,20 @@ pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_getexp_round_ss( +pub fn _mm_mask_getexp_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vgetexpss(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetexpss(a, b, src, k, SAE); + transmute(r) + } } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37169,16 +39007,14 @@ pub unsafe fn _mm_mask_getexp_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_getexp_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetexpss(a, b, f32x4::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetexpss(a, b, f32x4::ZERO, k, SAE); + transmute(r) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37190,12 +39026,14 @@ pub unsafe fn _mm_maskz_getexp_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE); - transmute(r) +pub fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE); + transmute(r) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37207,18 +39045,20 @@ pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_getexp_round_sd( +pub fn _mm_mask_getexp_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vgetexpsd(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetexpsd(a, b, src, k, SAE); + transmute(r) + } } /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\ @@ -37230,16 +39070,14 @@ pub unsafe fn _mm_mask_getexp_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_getexp_round_sd( - k: __mmask8, - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37260,7 +39098,7 @@ pub unsafe fn _mm_maskz_getexp_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3, 4)] -pub unsafe fn _mm_getmant_round_ss< +pub fn _mm_getmant_round_ss< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37268,13 +39106,15 @@ pub unsafe fn _mm_getmant_round_ss< a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37295,7 +39135,7 @@ pub unsafe fn _mm_getmant_round_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(4, 5, 6)] -pub unsafe fn _mm_mask_getmant_round_ss< +pub fn _mm_mask_getmant_round_ss< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37305,14 +39145,16 @@ pub unsafe fn _mm_mask_getmant_round_ss< a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37333,7 +39175,7 @@ pub unsafe fn _mm_mask_getmant_round_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4, 5)] -pub unsafe fn _mm_maskz_getmant_round_ss< +pub fn _mm_maskz_getmant_round_ss< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37342,13 +39184,15 @@ pub unsafe fn _mm_maskz_getmant_round_ss< a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37369,7 +39213,7 @@ pub unsafe fn _mm_maskz_getmant_round_ss< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(2, 3, 4)] -pub unsafe fn _mm_getmant_round_sd< +pub fn _mm_getmant_round_sd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37377,13 +39221,15 @@ pub unsafe fn _mm_getmant_round_sd< a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37404,7 +39250,7 @@ pub unsafe fn _mm_getmant_round_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(4, 5, 6)] -pub unsafe fn _mm_mask_getmant_round_sd< +pub fn _mm_mask_getmant_round_sd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37414,14 +39260,16 @@ pub unsafe fn _mm_mask_getmant_round_sd< a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE); + transmute(r) + } } /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\ @@ -37442,7 +39290,7 @@ pub unsafe fn _mm_mask_getmant_round_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))] #[rustc_legacy_const_generics(3, 4, 5)] -pub unsafe fn _mm_maskz_getmant_round_sd< +pub fn _mm_maskz_getmant_round_sd< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -37451,13 +39299,15 @@ pub unsafe fn _mm_maskz_getmant_round_sd< a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37475,16 +39325,15 @@ pub unsafe fn _mm_maskz_getmant_round_sd< #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_roundscale_round_ss( - a: __m128, - b: __m128, -) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE); - transmute(r) +pub fn _mm_roundscale_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37502,19 +39351,21 @@ pub unsafe fn _mm_roundscale_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_roundscale_round_ss( +pub fn _mm_mask_roundscale_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vrndscaless(a, b, src, k, IMM8, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vrndscaless(a, b, src, k, IMM8, SAE); + transmute(r) + } } /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37532,17 +39383,19 @@ pub unsafe fn _mm_mask_roundscale_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_maskz_roundscale_round_ss( +pub fn _mm_maskz_roundscale_round_ss( k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -37560,16 +39413,15 @@ pub unsafe fn _mm_maskz_roundscale_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_roundscale_round_sd( - a: __m128d, - b: __m128d, -) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE); - transmute(r) +pub fn _mm_roundscale_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37587,19 +39439,21 @@ pub unsafe fn _mm_roundscale_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_roundscale_round_sd( +pub fn _mm_mask_roundscale_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vrndscalesd(a, b, src, k, IMM8, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vrndscalesd(a, b, src, k, IMM8, SAE); + transmute(r) + } } /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37617,17 +39471,19 @@ pub unsafe fn _mm_mask_roundscale_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_maskz_roundscale_round_sd( +pub fn _mm_maskz_roundscale_round_sd( k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37645,12 +39501,14 @@ pub unsafe fn _mm_maskz_roundscale_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm_scalef_round_ss(a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37668,18 +39526,20 @@ pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_scalef_round_ss( +pub fn _mm_mask_scalef_round_ss( src: __m128, k: __mmask8, a: __m128, b: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let src = src.as_f32x4(); - let r = vscalefss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let src = src.as_f32x4(); + let r = vscalefss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37697,16 +39557,14 @@ pub unsafe fn _mm_mask_scalef_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_scalef_round_ss( - k: __mmask8, - a: __m128, - b: __m128, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_scalef_round_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -37724,12 +39582,14 @@ pub unsafe fn _mm_maskz_scalef_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm_scalef_round_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37747,17 +39607,19 @@ pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_scalef_round_sd( +pub fn _mm_mask_scalef_round_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let src = src.as_f64x2(); - let r = vscalefsd(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let src = src.as_f64x2(); + let r = vscalefsd(a, b, src, k, ROUNDING); + transmute(r) + } } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37775,16 +39637,18 @@ pub unsafe fn _mm_mask_scalef_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_scalef_round_sd( +pub fn _mm_maskz_scalef_round_sd( k: __mmask8, a: __m128d, b: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING); + transmute(r) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37802,13 +39666,15 @@ pub unsafe fn _mm_maskz_scalef_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let r = vfmaddssround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, r) +pub fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let r = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37826,20 +39692,22 @@ pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fmadd_round_ss( +pub fn _mm_mask_fmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmadd: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -37857,21 +39725,23 @@ pub unsafe fn _mm_mask_fmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fmadd_round_ss( +pub fn _mm_maskz_fmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmadd: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -37889,20 +39759,22 @@ pub unsafe fn _mm_maskz_fmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fmadd_round_ss( +pub fn _mm_mask3_fmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmadd: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -37920,17 +39792,15 @@ pub unsafe fn _mm_mask3_fmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fmadd_round_sd( - a: __m128d, - b: __m128d, - c: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fmadd) +pub fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmadd) + } } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37948,20 +39818,22 @@ pub unsafe fn _mm_fmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fmadd_round_sd( +pub fn _mm_mask_fmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmadd: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -37979,21 +39851,23 @@ pub unsafe fn _mm_mask_fmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fmadd_round_sd( +pub fn _mm_maskz_fmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmadd: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38011,20 +39885,22 @@ pub unsafe fn _mm_maskz_fmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fmadd_round_sd( +pub fn _mm_mask3_fmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmadd: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38042,14 +39918,16 @@ pub unsafe fn _mm_mask3_fmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fmsub) +pub fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmsub) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38067,21 +39945,23 @@ pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fmsub_round_ss( +pub fn _mm_mask_fmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmsub: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38099,22 +39979,24 @@ pub unsafe fn _mm_mask_fmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fmsub_round_ss( +pub fn _mm_maskz_fmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmsub: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38132,21 +40014,23 @@ pub unsafe fn _mm_maskz_fmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fmsub_round_ss( +pub fn _mm_mask3_fmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fmsub: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extractb: f32 = simd_extract!(b, 0); - let extractc = -fmsub; - fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38164,18 +40048,16 @@ pub unsafe fn _mm_mask3_fmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fmsub_round_sd( - a: __m128d, - b: __m128d, - c: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fmsub) +pub fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fmsub) + } } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38193,21 +40075,23 @@ pub unsafe fn _mm_fmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fmsub_round_sd( +pub fn _mm_mask_fmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmsub: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38225,22 +40109,24 @@ pub unsafe fn _mm_mask_fmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fmsub_round_sd( +pub fn _mm_maskz_fmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmsub: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38258,21 +40144,23 @@ pub unsafe fn _mm_maskz_fmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fmsub_round_sd( +pub fn _mm_mask3_fmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fmsub: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extractb: f64 = simd_extract!(b, 0); - let extractc = -fmsub; - fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fmsub; + fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38290,14 +40178,16 @@ pub unsafe fn _mm_mask3_fmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fnmadd) +pub fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmadd) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38315,21 +40205,23 @@ pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fnmadd_round_ss( +pub fn _mm_mask_fnmadd_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmadd; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38347,22 +40239,24 @@ pub unsafe fn _mm_mask_fnmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fnmadd_round_ss( +pub fn _mm_maskz_fnmadd_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38380,21 +40274,23 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fnmadd_round_ss( +pub fn _mm_mask3_fnmadd_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38412,18 +40308,16 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fnmadd_round_sd( - a: __m128d, - b: __m128d, - c: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fnmadd) +pub fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmadd) + } } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38441,21 +40335,23 @@ pub unsafe fn _mm_fnmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fnmadd_round_sd( +pub fn _mm_mask_fnmadd_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmadd; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmadd; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38473,22 +40369,24 @@ pub unsafe fn _mm_mask_fnmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fnmadd_round_sd( +pub fn _mm_maskz_fnmadd_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38506,21 +40404,23 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fnmadd_round_sd( +pub fn _mm_mask3_fnmadd_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38538,15 +40438,17 @@ pub unsafe fn _mm_mask3_fnmadd_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - static_assert_rounding!(ROUNDING); - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fnmsub) +pub fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmsub) + } } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38564,22 +40466,24 @@ pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fnmsub_round_ss( +pub fn _mm_mask_fnmsub_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f32 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmsub; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -38597,23 +40501,25 @@ pub unsafe fn _mm_mask_fnmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fnmsub_round_ss( +pub fn _mm_maskz_fnmsub_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f32 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc: f32 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc: f32 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\ @@ -38631,22 +40537,24 @@ pub unsafe fn _mm_maskz_fnmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fnmsub_round_ss( +pub fn _mm_mask3_fnmsub_round_ss( a: __m128, b: __m128, c: __m128, k: __mmask8, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f32 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f32 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f32 = simd_extract!(b, 0); - let extractc = -fnmsub; - fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f32 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f32 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f32 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -38664,19 +40572,17 @@ pub unsafe fn _mm_mask3_fnmsub_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fnmsub_round_sd( - a: __m128d, - b: __m128d, - c: __m128d, -) -> __m128d { - static_assert_rounding!(ROUNDING); - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, fnmsub) +pub fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, fnmsub) + } } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38694,22 +40600,24 @@ pub unsafe fn _mm_fnmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fnmsub_round_sd( +pub fn _mm_mask_fnmsub_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f64 = simd_extract!(a, 0); - if (k & 0b00000001) != 0 { - let extracta = -fnmsub; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = simd_extract!(a, 0); + if (k & 0b00000001) != 0 { + let extracta = -fnmsub; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ @@ -38727,23 +40635,25 @@ pub unsafe fn _mm_mask_fnmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fnmsub_round_sd( +pub fn _mm_maskz_fnmsub_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128d, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f64 = 0.; - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc: f64 = simd_extract!(c, 0); - let extractc = -extractc; - fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = 0.; + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc: f64 = simd_extract!(c, 0); + let extractc = -extractc; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\ @@ -38761,22 +40671,24 @@ pub unsafe fn _mm_maskz_fnmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask3_fnmsub_round_sd( +pub fn _mm_mask3_fnmsub_round_sd( a: __m128d, b: __m128d, c: __m128d, k: __mmask8, ) -> __m128d { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f64 = simd_extract!(c, 0); - if (k & 0b00000001) != 0 { - let extracta: f64 = simd_extract!(a, 0); - let extracta = -extracta; - let extractb: f64 = simd_extract!(b, 0); - let extractc = -fnmsub; - fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f64 = simd_extract!(c, 0); + if (k & 0b00000001) != 0 { + let extracta: f64 = simd_extract!(a, 0); + let extracta = -extracta; + let extractb: f64 = simd_extract!(b, 0); + let extractc = -fnmsub; + fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. @@ -38787,15 +40699,17 @@ pub unsafe fn _mm_mask3_fnmsub_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) +pub fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i) -> __m128 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. @@ -38806,20 +40720,22 @@ pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fixupimm_ss( +pub fn _mm_mask_fixupimm_ss( a: __m128, k: __mmask8, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract!(fixupimm, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting. @@ -38830,20 +40746,22 @@ pub unsafe fn _mm_mask_fixupimm_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fixupimm_ss( +pub fn _mm_maskz_fixupimm_ss( k: __mmask8, a: __m128, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f32 = simd_extract!(fixupimm, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f32 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. @@ -38854,15 +40772,17 @@ pub unsafe fn _mm_maskz_fixupimm_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract!(fixupimm, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) +pub fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. @@ -38873,20 +40793,22 @@ pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_fixupimm_sd( +pub fn _mm_mask_fixupimm_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract!(fixupimm, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting. @@ -38897,20 +40819,22 @@ pub unsafe fn _mm_mask_fixupimm_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_maskz_fixupimm_sd( +pub fn _mm_maskz_fixupimm_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); - let fixupimm: f64 = simd_extract!(fixupimm, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION); + let fixupimm: f64 = simd_extract!(fixupimm, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ @@ -38922,20 +40846,22 @@ pub unsafe fn _mm_maskz_fixupimm_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_fixupimm_round_ss( +pub fn _mm_fixupimm_round_ss( a: __m128, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE); - let fixupimm: f32 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ @@ -38947,21 +40873,23 @@ pub unsafe fn _mm_fixupimm_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_fixupimm_round_ss( +pub fn _mm_mask_fixupimm_round_ss( a: __m128, k: __mmask8, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmss(a, b, c, IMM8, k, SAE); - let fixupimm: f32 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmss(a, b, c, IMM8, k, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ @@ -38973,21 +40901,23 @@ pub unsafe fn _mm_mask_fixupimm_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_maskz_fixupimm_round_ss( +pub fn _mm_maskz_fixupimm_round_ss( k: __mmask8, a: __m128, b: __m128, c: __m128i, ) -> __m128 { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let c = c.as_i32x4(); - let r = vfixupimmssz(a, b, c, IMM8, k, SAE); - let fixupimm: f32 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let c = c.as_i32x4(); + let r = vfixupimmssz(a, b, c, IMM8, k, SAE); + let fixupimm: f32 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ @@ -38999,20 +40929,22 @@ pub unsafe fn _mm_maskz_fixupimm_round_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] -pub unsafe fn _mm_fixupimm_round_sd( +pub fn _mm_fixupimm_round_sd( a: __m128d, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE); - let fixupimm: f64 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ @@ -39024,21 +40956,23 @@ pub unsafe fn _mm_fixupimm_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_mask_fixupimm_round_sd( +pub fn _mm_mask_fixupimm_round_sd( a: __m128d, k: __mmask8, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmsd(a, b, c, IMM8, k, SAE); - let fixupimm: f64 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsd(a, b, c, IMM8, k, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ @@ -39050,21 +40984,23 @@ pub unsafe fn _mm_mask_fixupimm_round_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] -pub unsafe fn _mm_maskz_fixupimm_round_sd( +pub fn _mm_maskz_fixupimm_round_sd( k: __mmask8, a: __m128d, b: __m128d, c: __m128i, ) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let c = c.as_i64x2(); - let r = vfixupimmsdz(a, b, c, IMM8, k, SAE); - let fixupimm: f64 = simd_extract!(r, 0); - let r = simd_insert!(a, 0, fixupimm); - transmute(r) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let c = c.as_i64x2(); + let r = vfixupimmsdz(a, b, c, IMM8, k, SAE); + let fixupimm: f64 = simd_extract!(r, 0); + let r = simd_insert!(a, 0, fixupimm); + transmute(r) + } } /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. @@ -39074,31 +41010,35 @@ pub unsafe fn _mm_maskz_fixupimm_round_sd( #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2sd))] -pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d { - transmute(vcvtss2sd( - a.as_f64x2(), - b.as_f32x4(), - src.as_f64x2(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + transmute(vcvtss2sd( + a.as_f64x2(), + b.as_f32x4(), + src.as_f64x2(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } -/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. +/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2sd))] -pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d { - transmute(vcvtss2sd( - a.as_f64x2(), - b.as_f32x4(), - f64x2::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + transmute(vcvtss2sd( + a.as_f64x2(), + b.as_f32x4(), + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -39108,14 +41048,16 @@ pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2ss))] -pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 { - transmute(vcvtsd2ss( - a.as_f32x4(), - b.as_f64x2(), - src.as_f32x4(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + transmute(vcvtsd2ss( + a.as_f32x4(), + b.as_f64x2(), + src.as_f32x4(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -39125,75 +41067,79 @@ pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2ss))] -pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { - transmute(vcvtsd2ss( - a.as_f32x4(), - b.as_f64x2(), - f32x4::ZERO, - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + transmute(vcvtsd2ss( + a.as_f32x4(), + b.as_f64x2(), + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f32x4(); - let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE); - transmute(r) +pub fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE); + transmute(r) + } } /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_cvt_roundss_sd( +pub fn _mm_mask_cvt_roundss_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128, ) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f32x4(); - let src = src.as_f64x2(); - let r = vcvtss2sd(a, b, src, k, SAE); - transmute(r) + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let src = src.as_f64x2(); + let r = vcvtss2sd(a, b, src, k, SAE); + transmute(r) + } } /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. -/// +/// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_cvt_roundss_sd( - k: __mmask8, - a: __m128d, - b: __m128, -) -> __m128d { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f32x4(); - let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE); - transmute(r) +pub fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f32x4(); + let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE); + transmute(r) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39210,12 +41156,14 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f64x2(); - let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); + transmute(r) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39232,18 +41180,20 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_cvt_roundsd_ss( +pub fn _mm_mask_cvt_roundsd_ss( src: __m128, k: __mmask8, a: __m128, b: __m128d, ) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f64x2(); - let src = src.as_f32x4(); - let r = vcvtsd2ss(a, b, src, k, ROUNDING); - transmute(r) + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let src = src.as_f32x4(); + let r = vcvtsd2ss(a, b, src, k, ROUNDING); + transmute(r) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39260,16 +41210,14 @@ pub unsafe fn _mm_mask_cvt_roundsd_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_cvt_roundsd_ss( - k: __mmask8, - a: __m128, - b: __m128d, -) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let b = b.as_f64x2(); - let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING); - transmute(r) +pub fn _mm_maskz_cvt_roundsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let b = b.as_f64x2(); + let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING); + transmute(r) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ @@ -39286,10 +41234,12 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_si32(a: __m128) -> i32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2si(a, ROUNDING) +pub fn _mm_cvt_roundss_si32(a: __m128) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ @@ -39306,10 +41256,12 @@ pub unsafe fn _mm_cvt_roundss_si32(a: __m128) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_i32(a: __m128) -> i32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2si(a, ROUNDING) +pub fn _mm_cvt_roundss_i32(a: __m128) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ @@ -39326,10 +41278,12 @@ pub unsafe fn _mm_cvt_roundss_i32(a: __m128) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_u32(a: __m128) -> u32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2usi(a, ROUNDING) +pub fn _mm_cvt_roundss_u32(a: __m128) -> u32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2usi(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst. @@ -39339,8 +41293,8 @@ pub unsafe fn _mm_cvt_roundss_u32(a: __m128) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si))] -pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 { - vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtss_i32(a: __m128) -> i32 { + unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst. @@ -39350,8 +41304,8 @@ pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2usi))] -pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 { - vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtss_u32(a: __m128) -> u32 { + unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ @@ -39368,10 +41322,12 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2si(a, ROUNDING) +pub fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ @@ -39388,10 +41344,12 @@ pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2si(a, ROUNDING) +pub fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si(a, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ @@ -39408,10 +41366,12 @@ pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2usi(a, ROUNDING) +pub fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2usi(a, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst. @@ -39421,8 +41381,8 @@ pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si))] -pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 { - vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsd_i32(a: __m128d) -> i32 { + unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst. @@ -39432,8 +41392,8 @@ pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2usi))] -pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 { - vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsd_u32(a: __m128d) -> u32 { + unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39451,11 +41411,13 @@ pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtsi2ss(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss(a, b, ROUNDING); + transmute(r) + } } /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39473,11 +41435,13 @@ pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtsi2ss(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss(a, b, ROUNDING); + transmute(r) + } } /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -39494,11 +41458,13 @@ pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtusi2ss(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtusi2ss(a, b, ROUNDING); + transmute(r) + } } /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -39508,9 +41474,11 @@ pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32) -> __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss))] -pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { - let b = b as f32; - simd_insert!(a, 0, b) +pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } } /// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -39520,9 +41488,11 @@ pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2sd))] -pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { - let b = b as f64; - simd_insert!(a, 0, b) +pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39534,10 +41504,12 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2si(a, SAE) +pub fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39549,10 +41521,12 @@ pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2si(a, SAE) +pub fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ @@ -39564,10 +41538,12 @@ pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2usi(a, SAE) +pub fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2usi(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. @@ -39577,8 +41553,8 @@ pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si))] -pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 { - vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttss_i32(a: __m128) -> i32 { + unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. @@ -39588,8 +41564,8 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2usi))] -pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { - vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttss_u32(a: __m128) -> u32 { + unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39601,10 +41577,12 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2si(a, SAE) +pub fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si(a, SAE) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39616,10 +41594,12 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2si(a, SAE) +pub fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si(a, SAE) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ @@ -39631,10 +41611,12 @@ pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2usi(a, SAE) +pub fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2usi(a, SAE) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. @@ -39644,8 +41626,8 @@ pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si))] -pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 { - vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsd_i32(a: __m128d) -> i32 { + unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. @@ -39655,8 +41637,8 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2usi))] -pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 { - vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsd_u32(a: __m128d) -> u32 { + unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -39666,9 +41648,11 @@ pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2ss))] -pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { - let b = b as f32; - simd_insert!(a, 0, b) +pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } } /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -39678,9 +41662,11 @@ pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2sd))] -pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { - let b = b as f64; - simd_insert!(a, 0, b) +pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } } /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ @@ -39692,12 +41678,14 @@ pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128) -> i32 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let a = a.as_f32x4(); - let b = b.as_f32x4(); - vcomiss(a, b, IMM5, SAE) +pub fn _mm_comi_round_ss(a: __m128, b: __m128) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f32x4(); + let b = b.as_f32x4(); + vcomiss(a, b, IMM5, SAE) + } } /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ @@ -39709,12 +41697,14 @@ pub unsafe fn _mm_comi_round_ss(a: __m128, b: _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd #[rustc_legacy_const_generics(2, 3)] -pub unsafe fn _mm_comi_round_sd(a: __m128d, b: __m128d) -> i32 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_mantissas_sae!(SAE); - let a = a.as_f64x2(); - let b = b.as_f64x2(); - vcomisd(a, b, IMM5, SAE) +pub fn _mm_comi_round_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_mantissas_sae!(SAE); + let a = a.as_f64x2(); + let b = b.as_f64x2(); + vcomisd(a, b, IMM5, SAE) + } } /// Equal diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs index 20dace5e9c..e9dc88f87f 100644 --- a/crates/core_arch/src/x86/avx512fp16.rs +++ b/crates/core_arch/src/x86/avx512fp16.rs @@ -9,7 +9,7 @@ use crate::ptr; #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_set_ph( +pub fn _mm_set_ph( e7: f16, e6: f16, e5: f16, @@ -28,7 +28,7 @@ pub unsafe fn _mm_set_ph( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_set_ph( +pub fn _mm256_set_ph( e15: f16, e14: f16, e13: f16, @@ -57,7 +57,7 @@ pub unsafe fn _mm256_set_ph( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_set_ph( +pub fn _mm512_set_ph( e31: f16, e30: f16, e29: f16, @@ -104,7 +104,7 @@ pub unsafe fn _mm512_set_ph( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_set_sh(a: f16) -> __m128h { +pub fn _mm_set_sh(a: f16) -> __m128h { __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) } @@ -114,8 +114,8 @@ pub unsafe fn _mm_set_sh(a: f16) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_set1_ph(a: f16) -> __m128h { - transmute(f16x8::splat(a)) +pub fn _mm_set1_ph(a: f16) -> __m128h { + unsafe { transmute(f16x8::splat(a)) } } /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. @@ -124,8 +124,8 @@ pub unsafe fn _mm_set1_ph(a: f16) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_set1_ph(a: f16) -> __m256h { - transmute(f16x16::splat(a)) +pub fn _mm256_set1_ph(a: f16) -> __m256h { + unsafe { transmute(f16x16::splat(a)) } } /// Broadcast the half-precision (16-bit) floating-point value a to all elements of dst. @@ -134,8 +134,8 @@ pub unsafe fn _mm256_set1_ph(a: f16) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_set1_ph(a: f16) -> __m512h { - transmute(f16x32::splat(a)) +pub fn _mm512_set1_ph(a: f16) -> __m512h { + unsafe { transmute(f16x32::splat(a)) } } /// Set packed half-precision (16-bit) floating-point elements in dst with the supplied values in reverse order. @@ -144,7 +144,7 @@ pub unsafe fn _mm512_set1_ph(a: f16) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_setr_ph( +pub fn _mm_setr_ph( e0: f16, e1: f16, e2: f16, @@ -163,7 +163,7 @@ pub unsafe fn _mm_setr_ph( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_setr_ph( +pub fn _mm256_setr_ph( e0: f16, e1: f16, e2: f16, @@ -192,7 +192,7 @@ pub unsafe fn _mm256_setr_ph( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_setr_ph( +pub fn _mm512_setr_ph( e0: f16, e1: f16, e2: f16, @@ -238,8 +238,8 @@ pub unsafe fn _mm512_setr_ph( #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_setzero_ph() -> __m128h { - transmute(f16x8::ZERO) +pub fn _mm_setzero_ph() -> __m128h { + unsafe { transmute(f16x8::ZERO) } } /// Return vector of type __m256h with all elements set to zero. @@ -248,8 +248,8 @@ pub unsafe fn _mm_setzero_ph() -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_setzero_ph() -> __m256h { - transmute(f16x16::ZERO) +pub fn _mm256_setzero_ph() -> __m256h { + unsafe { transmute(f16x16::ZERO) } } /// Return vector of type __m512h with all elements set to zero. @@ -258,8 +258,8 @@ pub unsafe fn _mm256_setzero_ph() -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_setzero_ph() -> __m512h { - transmute(f16x32::ZERO) +pub fn _mm512_setzero_ph() -> __m512h { + unsafe { transmute(f16x32::ZERO) } } /// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero @@ -269,8 +269,8 @@ pub unsafe fn _mm512_setzero_ph() -> __m512h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_undefined_ph() -> __m128h { - transmute(f16x8::ZERO) +pub fn _mm_undefined_ph() -> __m128h { + unsafe { transmute(f16x8::ZERO) } } /// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero @@ -280,8 +280,8 @@ pub unsafe fn _mm_undefined_ph() -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_undefined_ph() -> __m256h { - transmute(f16x16::ZERO) +pub fn _mm256_undefined_ph() -> __m256h { + unsafe { transmute(f16x16::ZERO) } } /// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero @@ -291,8 +291,8 @@ pub unsafe fn _mm256_undefined_ph() -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_undefined_ph() -> __m512h { - transmute(f16x32::ZERO) +pub fn _mm512_undefined_ph() -> __m512h { + unsafe { transmute(f16x32::ZERO) } } /// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and @@ -302,8 +302,8 @@ pub unsafe fn _mm512_undefined_ph() -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castpd_ph(a: __m128d) -> __m128h { - transmute(a) +pub fn _mm_castpd_ph(a: __m128d) -> __m128h { + unsafe { transmute(a) } } /// Cast vector of type `__m256d` to type `__m256h`. This intrinsic is only used for compilation and @@ -313,8 +313,8 @@ pub unsafe fn _mm_castpd_ph(a: __m128d) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castpd_ph(a: __m256d) -> __m256h { - transmute(a) +pub fn _mm256_castpd_ph(a: __m256d) -> __m256h { + unsafe { transmute(a) } } /// Cast vector of type `__m512d` to type `__m512h`. This intrinsic is only used for compilation and @@ -324,8 +324,8 @@ pub unsafe fn _mm256_castpd_ph(a: __m256d) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castpd_ph(a: __m512d) -> __m512h { - transmute(a) +pub fn _mm512_castpd_ph(a: __m512d) -> __m512h { + unsafe { transmute(a) } } /// Cast vector of type `__m128h` to type `__m128d`. This intrinsic is only used for compilation and @@ -335,8 +335,8 @@ pub unsafe fn _mm512_castpd_ph(a: __m512d) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castph_pd(a: __m128h) -> __m128d { - transmute(a) +pub fn _mm_castph_pd(a: __m128h) -> __m128d { + unsafe { transmute(a) } } /// Cast vector of type `__m256h` to type `__m256d`. This intrinsic is only used for compilation and @@ -346,8 +346,8 @@ pub unsafe fn _mm_castph_pd(a: __m128h) -> __m128d { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castph_pd(a: __m256h) -> __m256d { - transmute(a) +pub fn _mm256_castph_pd(a: __m256h) -> __m256d { + unsafe { transmute(a) } } /// Cast vector of type `__m512h` to type `__m512d`. This intrinsic is only used for compilation and @@ -357,8 +357,8 @@ pub unsafe fn _mm256_castph_pd(a: __m256h) -> __m256d { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph_pd(a: __m512h) -> __m512d { - transmute(a) +pub fn _mm512_castph_pd(a: __m512h) -> __m512d { + unsafe { transmute(a) } } /// Cast vector of type `__m128` to type `__m128h`. This intrinsic is only used for compilation and @@ -368,8 +368,8 @@ pub unsafe fn _mm512_castph_pd(a: __m512h) -> __m512d { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castps_ph(a: __m128) -> __m128h { - transmute(a) +pub fn _mm_castps_ph(a: __m128) -> __m128h { + unsafe { transmute(a) } } /// Cast vector of type `__m256` to type `__m256h`. This intrinsic is only used for compilation and @@ -379,8 +379,8 @@ pub unsafe fn _mm_castps_ph(a: __m128) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castps_ph(a: __m256) -> __m256h { - transmute(a) +pub fn _mm256_castps_ph(a: __m256) -> __m256h { + unsafe { transmute(a) } } /// Cast vector of type `__m512` to type `__m512h`. This intrinsic is only used for compilation and @@ -390,8 +390,8 @@ pub unsafe fn _mm256_castps_ph(a: __m256) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castps_ph(a: __m512) -> __m512h { - transmute(a) +pub fn _mm512_castps_ph(a: __m512) -> __m512h { + unsafe { transmute(a) } } /// Cast vector of type `__m128h` to type `__m128`. This intrinsic is only used for compilation and @@ -401,8 +401,8 @@ pub unsafe fn _mm512_castps_ph(a: __m512) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castph_ps(a: __m128h) -> __m128 { - transmute(a) +pub fn _mm_castph_ps(a: __m128h) -> __m128 { + unsafe { transmute(a) } } /// Cast vector of type `__m256h` to type `__m256`. This intrinsic is only used for compilation and @@ -412,8 +412,8 @@ pub unsafe fn _mm_castph_ps(a: __m128h) -> __m128 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castph_ps(a: __m256h) -> __m256 { - transmute(a) +pub fn _mm256_castph_ps(a: __m256h) -> __m256 { + unsafe { transmute(a) } } /// Cast vector of type `__m512h` to type `__m512`. This intrinsic is only used for compilation and @@ -423,8 +423,8 @@ pub unsafe fn _mm256_castph_ps(a: __m256h) -> __m256 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph_ps(a: __m512h) -> __m512 { - transmute(a) +pub fn _mm512_castph_ps(a: __m512h) -> __m512 { + unsafe { transmute(a) } } /// Cast vector of type `__m128i` to type `__m128h`. This intrinsic is only used for compilation and @@ -434,8 +434,8 @@ pub unsafe fn _mm512_castph_ps(a: __m512h) -> __m512 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castsi128_ph(a: __m128i) -> __m128h { - transmute(a) +pub fn _mm_castsi128_ph(a: __m128i) -> __m128h { + unsafe { transmute(a) } } /// Cast vector of type `__m256i` to type `__m256h`. This intrinsic is only used for compilation and @@ -445,8 +445,8 @@ pub unsafe fn _mm_castsi128_ph(a: __m128i) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castsi256_ph(a: __m256i) -> __m256h { - transmute(a) +pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h { + unsafe { transmute(a) } } /// Cast vector of type `__m512i` to type `__m512h`. This intrinsic is only used for compilation and @@ -456,8 +456,8 @@ pub unsafe fn _mm256_castsi256_ph(a: __m256i) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castsi512_ph(a: __m512i) -> __m512h { - transmute(a) +pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h { + unsafe { transmute(a) } } /// Cast vector of type `__m128h` to type `__m128i`. This intrinsic is only used for compilation and @@ -467,8 +467,8 @@ pub unsafe fn _mm512_castsi512_ph(a: __m512i) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_castph_si128(a: __m128h) -> __m128i { - transmute(a) +pub fn _mm_castph_si128(a: __m128h) -> __m128i { + unsafe { transmute(a) } } /// Cast vector of type `__m256h` to type `__m256i`. This intrinsic is only used for compilation and @@ -478,8 +478,8 @@ pub unsafe fn _mm_castph_si128(a: __m128h) -> __m128i { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castph_si256(a: __m256h) -> __m256i { - transmute(a) +pub fn _mm256_castph_si256(a: __m256h) -> __m256i { + unsafe { transmute(a) } } /// Cast vector of type `__m512h` to type `__m512i`. This intrinsic is only used for compilation and @@ -489,8 +489,8 @@ pub unsafe fn _mm256_castph_si256(a: __m256h) -> __m256i { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph_si512(a: __m512h) -> __m512i { - transmute(a) +pub fn _mm512_castph_si512(a: __m512h) -> __m512i { + unsafe { transmute(a) } } /// Cast vector of type `__m256h` to type `__m128h`. This intrinsic is only used for compilation and @@ -500,8 +500,8 @@ pub unsafe fn _mm512_castph_si512(a: __m512h) -> __m512i { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castph256_ph128(a: __m256h) -> __m128h { - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Cast vector of type `__m512h` to type `__m128h`. This intrinsic is only used for compilation and @@ -511,8 +511,8 @@ pub unsafe fn _mm256_castph256_ph128(a: __m256h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph512_ph128(a: __m512h) -> __m128h { - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) } } /// Cast vector of type `__m512h` to type `__m256h`. This intrinsic is only used for compilation and @@ -522,8 +522,8 @@ pub unsafe fn _mm512_castph512_ph128(a: __m512h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph512_ph256(a: __m512h) -> __m256h { - simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h { + unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } } /// Cast vector of type `__m128h` to type `__m256h`. The upper 8 elements of the result are undefined. @@ -534,12 +534,14 @@ pub unsafe fn _mm512_castph512_ph256(a: __m512h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_castph128_ph256(a: __m128h) -> __m256h { - simd_shuffle!( - a, - _mm_undefined_ph(), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] - ) +pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ph(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ) + } } /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are undefined. @@ -550,15 +552,17 @@ pub unsafe fn _mm256_castph128_ph256(a: __m128h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph128_ph512(a: __m128h) -> __m512h { - simd_shuffle!( - a, - _mm_undefined_ph(), - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8 - ] - ) +pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm_undefined_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8 + ] + ) + } } /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are undefined. @@ -569,15 +573,17 @@ pub unsafe fn _mm512_castph128_ph512(a: __m128h) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_castph256_ph512(a: __m256h) -> __m512h { - simd_shuffle!( - a, - _mm256_undefined_ph(), - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16 - ] - ) +pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm256_undefined_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16 + ] + ) + } } /// Cast vector of type `__m256h` to type `__m128h`. The upper 8 elements of the result are zeroed. @@ -588,12 +594,14 @@ pub unsafe fn _mm512_castph256_ph512(a: __m256h) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { - simd_shuffle!( - a, - _mm_setzero_ph(), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] - ) +pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { + unsafe { + simd_shuffle!( + a, + _mm_setzero_ph(), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ) + } } /// Cast vector of type `__m256h` to type `__m512h`. The upper 16 elements of the result are zeroed. @@ -604,15 +612,17 @@ pub unsafe fn _mm256_zextph128_ph256(a: __m128h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { - simd_shuffle!( - a, - _mm256_setzero_ph(), - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16 - ] - ) +pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm256_setzero_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16 + ] + ) + } } /// Cast vector of type `__m128h` to type `__m512h`. The upper 24 elements of the result are zeroed. @@ -623,15 +633,17 @@ pub unsafe fn _mm512_zextph256_ph512(a: __m256h) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { - simd_shuffle!( - a, - _mm_setzero_ph(), - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8 - ] - ) +pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h { + unsafe { + simd_shuffle!( + a, + _mm_setzero_ph(), + [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8 + ] + ) + } } macro_rules! cmp_asm { // FIXME: use LLVM intrinsics @@ -670,9 +682,11 @@ macro_rules! cmp_asm { // FIXME: use LLVM intrinsics #[target_feature(enable = "avx512fp16,avx512vl")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask8, xmm_reg, a, b) +pub fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask8, xmm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -684,13 +698,11 @@ pub unsafe fn _mm_cmp_ph_mask(a: __m128h, b: __m128h) -> __mmas #[target_feature(enable = "avx512fp16,avx512vl")] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmp_ph_mask( - k1: __mmask8, - a: __m128h, - b: __m128h, -) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask8, k1, xmm_reg, a, b) +pub fn _mm_mask_cmp_ph_mask(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask8, k1, xmm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -701,9 +713,11 @@ pub unsafe fn _mm_mask_cmp_ph_mask( #[target_feature(enable = "avx512fp16,avx512vl")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __mmask16 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask16, ymm_reg, a, b) +pub fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask16, ymm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -715,13 +729,15 @@ pub unsafe fn _mm256_cmp_ph_mask(a: __m256h, b: __m256h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cmp_ph_mask( +pub fn _mm256_mask_cmp_ph_mask( k1: __mmask16, a: __m256h, b: __m256h, ) -> __mmask16 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask16, k1, ymm_reg, a, b) + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask16, k1, ymm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -732,9 +748,11 @@ pub unsafe fn _mm256_mask_cmp_ph_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __mmask32 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask32, zmm_reg, a, b) +pub fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask32, zmm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -746,13 +764,15 @@ pub unsafe fn _mm512_cmp_ph_mask(a: __m512h, b: __m512h) -> __m #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cmp_ph_mask( +pub fn _mm512_mask_cmp_ph_mask( k1: __mmask32, a: __m512h, b: __m512h, ) -> __mmask32 { - static_assert_uimm_bits!(IMM5, 5); - cmp_asm!(__mmask32, k1, zmm_reg, a, b) + unsafe { + static_assert_uimm_bits!(IMM5, 5); + cmp_asm!(__mmask32, k1, zmm_reg, a, b) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -765,25 +785,27 @@ pub unsafe fn _mm512_mask_cmp_ph_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cmp_round_ph_mask( +pub fn _mm512_cmp_round_ph_mask( a: __m512h, b: __m512h, ) -> __mmask32 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_sae!(SAE); - if SAE == _MM_FROUND_NO_EXC { - let dst: __mmask32; - asm!( - "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}", - k = lateout(kreg) dst, - a = in(zmm_reg) a, - b = in(zmm_reg) b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - } else { - cmp_asm!(__mmask32, zmm_reg, a, b) + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + if SAE == _MM_FROUND_NO_EXC { + let dst: __mmask32; + asm!( + "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}", + k = lateout(kreg) dst, + a = in(zmm_reg) a, + b = in(zmm_reg) b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + } else { + cmp_asm!(__mmask32, zmm_reg, a, b) + } } } @@ -798,27 +820,29 @@ pub unsafe fn _mm512_cmp_round_ph_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cmp_round_ph_mask( +pub fn _mm512_mask_cmp_round_ph_mask( k1: __mmask32, a: __m512h, b: __m512h, ) -> __mmask32 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_sae!(SAE); - if SAE == _MM_FROUND_NO_EXC { - let dst: __mmask32; - asm!( - "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}", - k = lateout(kreg) dst, - k1 = in(kreg) k1, - a = in(zmm_reg) a, - b = in(zmm_reg) b, - imm8 = const IMM5, - options(pure, nomem, nostack) - ); - dst - } else { - cmp_asm!(__mmask32, k1, zmm_reg, a, b) + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + if SAE == _MM_FROUND_NO_EXC { + let dst: __mmask32; + asm!( + "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}", + k = lateout(kreg) dst, + k1 = in(kreg) k1, + a = in(zmm_reg) a, + b = in(zmm_reg) b, + imm8 = const IMM5, + options(pure, nomem, nostack) + ); + dst + } else { + cmp_asm!(__mmask32, k1, zmm_reg, a, b) + } } } @@ -831,10 +855,7 @@ pub unsafe fn _mm512_mask_cmp_round_ph_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmp_round_sh_mask( - a: __m128h, - b: __m128h, -) -> __mmask8 { +pub fn _mm_cmp_round_sh_mask(a: __m128h, b: __m128h) -> __mmask8 { static_assert_uimm_bits!(IMM5, 5); static_assert_sae!(SAE); _mm_mask_cmp_round_sh_mask::(0xff, a, b) @@ -849,14 +870,16 @@ pub unsafe fn _mm_cmp_round_sh_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmp_round_sh_mask( +pub fn _mm_mask_cmp_round_sh_mask( k1: __mmask8, a: __m128h, b: __m128h, ) -> __mmask8 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_sae!(SAE); - vcmpsh(a, b, IMM5, k1, SAE) + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + vcmpsh(a, b, IMM5, k1, SAE) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -867,7 +890,7 @@ pub unsafe fn _mm_mask_cmp_round_sh_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 { +pub fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmask8 { static_assert_uimm_bits!(IMM5, 5); _mm_cmp_round_sh_mask::(a, b) } @@ -880,11 +903,7 @@ pub unsafe fn _mm_cmp_sh_mask(a: __m128h, b: __m128h) -> __mmas #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmp_sh_mask( - k1: __mmask8, - a: __m128h, - b: __m128h, -) -> __mmask8 { +pub fn _mm_mask_cmp_sh_mask(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 { static_assert_uimm_bits!(IMM5, 5); _mm_mask_cmp_round_sh_mask::(k1, a, b) } @@ -898,10 +917,12 @@ pub unsafe fn _mm_mask_cmp_sh_mask( #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 { - static_assert_uimm_bits!(IMM5, 5); - static_assert_sae!(SAE); - vcomish(a, b, IMM5, SAE) +pub fn _mm_comi_round_sh(a: __m128h, b: __m128h) -> i32 { + unsafe { + static_assert_uimm_bits!(IMM5, 5); + static_assert_sae!(SAE); + vcomish(a, b, IMM5, SAE) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b based on the comparison @@ -912,7 +933,7 @@ pub unsafe fn _mm_comi_round_sh(a: __m128h, b: #[target_feature(enable = "avx512fp16")] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 { static_assert_uimm_bits!(IMM5, 5); _mm_comi_round_sh::(a, b) } @@ -924,7 +945,7 @@ pub unsafe fn _mm_comi_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_EQ_OS>(a, b) } @@ -935,7 +956,7 @@ pub unsafe fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_GE_OS>(a, b) } @@ -946,7 +967,7 @@ pub unsafe fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_GT_OS>(a, b) } @@ -957,7 +978,7 @@ pub unsafe fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_LE_OS>(a, b) } @@ -968,7 +989,7 @@ pub unsafe fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_LT_OS>(a, b) } @@ -979,7 +1000,7 @@ pub unsafe fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_NEQ_OS>(a, b) } @@ -990,7 +1011,7 @@ pub unsafe fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_EQ_OQ>(a, b) } @@ -1001,7 +1022,7 @@ pub unsafe fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_GE_OQ>(a, b) } @@ -1012,7 +1033,7 @@ pub unsafe fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_GT_OQ>(a, b) } @@ -1023,7 +1044,7 @@ pub unsafe fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_LE_OQ>(a, b) } @@ -1034,7 +1055,7 @@ pub unsafe fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_LT_OQ>(a, b) } @@ -1045,7 +1066,7 @@ pub unsafe fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 { +pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 { _mm_comi_sh::<_CMP_NEQ_OQ>(a, b) } @@ -1172,12 +1193,14 @@ pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let mut mov: f16 = simd_extract!(src, 0); - if (k & 1) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mut mov: f16 = simd_extract!(src, 0); + if (k & 1) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst @@ -1188,12 +1211,14 @@ pub unsafe fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let mut mov: f16 = 0.; - if (k & 1) != 0 { - mov = simd_extract!(b, 0); +pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mut mov: f16 = 0.; + if (k & 1) != 0 { + mov = simd_extract!(b, 0); + } + simd_insert!(a, 0, mov) } - simd_insert!(a, 0, mov) } /// Move the lower half-precision (16-bit) floating-point element from b to the lower element of dst, @@ -1203,9 +1228,11 @@ pub unsafe fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h { - let mov: f16 = simd_extract!(b, 0); - simd_insert!(a, 0, mov) +pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h { + unsafe { + let mov: f16 = simd_extract!(b, 0); + simd_insert!(a, 0, mov) + } } /// Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from a into memory. @@ -1307,8 +1334,8 @@ pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { - simd_add(a, b) +pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_add(a, b) } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1319,9 +1346,11 @@ pub unsafe fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_add_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_add_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1332,9 +1361,11 @@ pub unsafe fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_add_ph(a, b); - simd_select_bitmask(k, r, _mm_setzero_ph()) +pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_add_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -1344,8 +1375,8 @@ pub unsafe fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { - simd_add(a, b) +pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_add(a, b) } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1356,9 +1387,11 @@ pub unsafe fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_add_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_add_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1369,9 +1402,11 @@ pub unsafe fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_add_ph(a, b); - simd_select_bitmask(k, r, _mm256_setzero_ph()) +pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_add_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -1381,8 +1416,8 @@ pub unsafe fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { - simd_add(a, b) +pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_add(a, b) } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1393,9 +1428,11 @@ pub unsafe fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_add_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_add_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1406,9 +1443,11 @@ pub unsafe fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_add_ph(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) +pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_add_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -1426,9 +1465,11 @@ pub unsafe fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_add_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - vaddph(a, b, ROUNDING) +pub fn _mm512_add_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vaddph(a, b, ROUNDING) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1447,15 +1488,17 @@ pub unsafe fn _mm512_add_round_ph(a: __m512h, b: __m512h) - #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_add_round_ph( +pub fn _mm512_mask_add_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_add_round_ph::(a, b); - simd_select_bitmask(k, r, src) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_add_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } } /// Add packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1473,14 +1516,16 @@ pub unsafe fn _mm512_mask_add_round_ph( #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_add_round_ph( +pub fn _mm512_maskz_add_round_ph( k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_add_round_ph::(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_add_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -1499,7 +1544,7 @@ pub unsafe fn _mm512_maskz_add_round_ph( #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_add_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_add_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_add_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -1521,14 +1566,16 @@ pub unsafe fn _mm_add_round_sh(a: __m128h, b: __m128h) -> _ #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_add_round_sh( +pub fn _mm_mask_add_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vaddsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vaddsh(a, b, src, k, ROUNDING) + } } /// Add the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -1548,11 +1595,7 @@ pub unsafe fn _mm_mask_add_round_sh( #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_add_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_add_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_add_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -1565,7 +1608,7 @@ pub unsafe fn _mm_maskz_add_round_sh( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { _mm_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -1578,7 +1621,7 @@ pub unsafe fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -1591,7 +1634,7 @@ pub unsafe fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vaddsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_add_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) } @@ -1602,8 +1645,8 @@ pub unsafe fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { - simd_sub(a, b) +pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_sub(a, b) } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1614,9 +1657,11 @@ pub unsafe fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_sub_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1627,9 +1672,11 @@ pub unsafe fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_sub_ph(a, b); - simd_select_bitmask(k, r, _mm_setzero_ph()) +pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_sub_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. @@ -1639,8 +1686,8 @@ pub unsafe fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { - simd_sub(a, b) +pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_sub(a, b) } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1651,9 +1698,11 @@ pub unsafe fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_sub_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1664,9 +1713,11 @@ pub unsafe fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_sub_ph(a, b); - simd_select_bitmask(k, r, _mm256_setzero_ph()) +pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_sub_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. @@ -1676,8 +1727,8 @@ pub unsafe fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { - simd_sub(a, b) +pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_sub(a, b) } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1688,9 +1739,11 @@ pub unsafe fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_sub_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_sub_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1701,9 +1754,11 @@ pub unsafe fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_sub_ph(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) +pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_sub_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst. @@ -1721,9 +1776,11 @@ pub unsafe fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_sub_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - vsubph(a, b, ROUNDING) +pub fn _mm512_sub_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vsubph(a, b, ROUNDING) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1742,15 +1799,17 @@ pub unsafe fn _mm512_sub_round_ph(a: __m512h, b: __m512h) - #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_sub_round_ph( +pub fn _mm512_mask_sub_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_sub_round_ph::(a, b); - simd_select_bitmask(k, r, src) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_sub_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } } /// Subtract packed half-precision (16-bit) floating-point elements in b from a, and store the results in dst using @@ -1769,14 +1828,16 @@ pub unsafe fn _mm512_mask_sub_round_ph( #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_sub_round_ph( +pub fn _mm512_maskz_sub_round_ph( k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_sub_round_ph::(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_sub_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the @@ -1795,7 +1856,7 @@ pub unsafe fn _mm512_maskz_sub_round_ph( #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sub_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_sub_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_sub_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -1817,14 +1878,16 @@ pub unsafe fn _mm_sub_round_sh(a: __m128h, b: __m128h) -> _ #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sub_round_sh( +pub fn _mm_mask_sub_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vsubsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vsubsh(a, b, src, k, ROUNDING) + } } /// Subtract the lower half-precision (16-bit) floating-point elements in b from a, store the result in the @@ -1844,11 +1907,7 @@ pub unsafe fn _mm_mask_sub_round_sh( #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sub_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_sub_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_sub_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -1861,7 +1920,7 @@ pub unsafe fn _mm_maskz_sub_round_sh( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { _mm_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -1874,7 +1933,7 @@ pub unsafe fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -1887,7 +1946,7 @@ pub unsafe fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsubsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_sub_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) } @@ -1898,8 +1957,8 @@ pub unsafe fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { - simd_mul(a, b) +pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_mul(a, b) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1910,9 +1969,11 @@ pub unsafe fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_mul_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1923,9 +1984,11 @@ pub unsafe fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_mul_ph(a, b); - simd_select_bitmask(k, r, _mm_setzero_ph()) +pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_mul_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -1935,8 +1998,8 @@ pub unsafe fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { - simd_mul(a, b) +pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_mul(a, b) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1947,9 +2010,11 @@ pub unsafe fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_mul_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1960,9 +2025,11 @@ pub unsafe fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_mul_ph(a, b); - simd_select_bitmask(k, r, _mm256_setzero_ph()) +pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_mul_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -1972,8 +2039,8 @@ pub unsafe fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { - simd_mul(a, b) +pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_mul(a, b) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1984,9 +2051,11 @@ pub unsafe fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_mul_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_mul_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -1997,9 +2066,11 @@ pub unsafe fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_mul_ph(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) +pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_mul_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst. @@ -2017,9 +2088,11 @@ pub unsafe fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mul_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - vmulph(a, b, ROUNDING) +pub fn _mm512_mul_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vmulph(a, b, ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -2038,15 +2111,17 @@ pub unsafe fn _mm512_mul_round_ph(a: __m512h, b: __m512h) - #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_mul_round_ph( +pub fn _mm512_mask_mul_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_mul_round_ph::(a, b); - simd_select_bitmask(k, r, src) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_mul_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, and store the results in dst using @@ -2065,14 +2140,16 @@ pub unsafe fn _mm512_mask_mul_round_ph( #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_mul_round_ph( +pub fn _mm512_maskz_mul_round_ph( k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_mul_round_ph::(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_mul_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -2091,7 +2168,7 @@ pub unsafe fn _mm512_maskz_mul_round_ph( #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mul_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_mul_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -2113,14 +2190,16 @@ pub unsafe fn _mm_mul_round_sh(a: __m128h, b: __m128h) -> _ #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_round_sh( +pub fn _mm_mask_mul_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vmulsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vmulsh(a, b, src, k, ROUNDING) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, store the result in the @@ -2140,11 +2219,7 @@ pub unsafe fn _mm_mask_mul_round_sh( #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_mul_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_mul_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -2157,7 +2232,7 @@ pub unsafe fn _mm_maskz_mul_round_sh( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -2170,7 +2245,7 @@ pub unsafe fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -2183,7 +2258,7 @@ pub unsafe fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmulsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_mul_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) } @@ -2194,8 +2269,8 @@ pub unsafe fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { - simd_div(a, b) +pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_div(a, b) } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2206,9 +2281,11 @@ pub unsafe fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_div_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_div_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2219,9 +2296,11 @@ pub unsafe fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - let r = _mm_div_ph(a, b); - simd_select_bitmask(k, r, _mm_setzero_ph()) +pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { + let r = _mm_div_ph(a, b); + simd_select_bitmask(k, r, _mm_setzero_ph()) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. @@ -2231,8 +2310,8 @@ pub unsafe fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { - simd_div(a, b) +pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_div(a, b) } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2243,9 +2322,11 @@ pub unsafe fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_div_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_div_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2256,9 +2337,11 @@ pub unsafe fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - let r = _mm256_div_ph(a, b); - simd_select_bitmask(k, r, _mm256_setzero_ph()) +pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { + let r = _mm256_div_ph(a, b); + simd_select_bitmask(k, r, _mm256_setzero_ph()) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. @@ -2268,8 +2351,8 @@ pub unsafe fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { - simd_div(a, b) +pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_div(a, b) } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2280,9 +2363,11 @@ pub unsafe fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_div_ph(a, b); - simd_select_bitmask(k, r, src) +pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_div_ph(a, b); + simd_select_bitmask(k, r, src) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2293,9 +2378,11 @@ pub unsafe fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - let r = _mm512_div_ph(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) +pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + let r = _mm512_div_ph(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst. @@ -2313,9 +2400,11 @@ pub unsafe fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_div_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - vdivph(a, b, ROUNDING) +pub fn _mm512_div_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vdivph(a, b, ROUNDING) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2334,15 +2423,17 @@ pub unsafe fn _mm512_div_round_ph(a: __m512h, b: __m512h) - #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_div_round_ph( +pub fn _mm512_mask_div_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_div_round_ph::(a, b); - simd_select_bitmask(k, r, src) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_div_round_ph::(a, b); + simd_select_bitmask(k, r, src) + } } /// Divide packed half-precision (16-bit) floating-point elements in a by b, and store the results in dst using @@ -2361,14 +2452,16 @@ pub unsafe fn _mm512_mask_div_round_ph( #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_div_round_ph( +pub fn _mm512_maskz_div_round_ph( k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r = _mm512_div_round_ph::(a, b); - simd_select_bitmask(k, r, _mm512_setzero_ph()) + unsafe { + static_assert_rounding!(ROUNDING); + let r = _mm512_div_round_ph::(a, b); + simd_select_bitmask(k, r, _mm512_setzero_ph()) + } } /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the @@ -2387,7 +2480,7 @@ pub unsafe fn _mm512_maskz_div_round_ph( #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_div_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_div_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_div_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -2409,14 +2502,16 @@ pub unsafe fn _mm_div_round_sh(a: __m128h, b: __m128h) -> _ #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_div_round_sh( +pub fn _mm_mask_div_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vdivsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vdivsh(a, b, src, k, ROUNDING) + } } /// Divide the lower half-precision (16-bit) floating-point elements in a by b, store the result in the @@ -2436,11 +2531,7 @@ pub unsafe fn _mm_mask_div_round_sh( #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_div_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_div_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_div_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -2453,7 +2544,7 @@ pub unsafe fn _mm_maskz_div_round_sh( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { _mm_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -2466,7 +2557,7 @@ pub unsafe fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -2479,7 +2570,7 @@ pub unsafe fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vdivsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_div_round_sh::<_MM_FROUND_CUR_DIRECTION>(k, a, b) } @@ -2492,7 +2583,7 @@ pub unsafe fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_pch(_mm_undefined_ph(), 0xff, a, b) } @@ -2505,8 +2596,8 @@ pub unsafe fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - transmute(vfmulcph_128(transmute(a), transmute(b), transmute(src), k)) +pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { transmute(vfmulcph_128(transmute(a), transmute(b), transmute(src), k)) } } /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element @@ -2518,7 +2609,7 @@ pub unsafe fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_pch(_mm_setzero_ph(), k, a, b) } @@ -2531,7 +2622,7 @@ pub unsafe fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { _mm256_mask_mul_pch(_mm256_undefined_ph(), 0xff, a, b) } @@ -2544,8 +2635,8 @@ pub unsafe fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { - transmute(vfmulcph_256(transmute(a), transmute(b), transmute(src), k)) +pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + unsafe { transmute(vfmulcph_256(transmute(a), transmute(b), transmute(src), k)) } } /// Multiply packed complex numbers in a and b, and store the results in dst using zeromask k (the element @@ -2557,7 +2648,7 @@ pub unsafe fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_mask_mul_pch(_mm256_setzero_ph(), k, a, b) } @@ -2570,7 +2661,7 @@ pub unsafe fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { _mm512_mask_mul_pch(_mm512_undefined_ph(), 0xffff, a, b) } @@ -2583,7 +2674,7 @@ pub unsafe fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -2596,7 +2687,7 @@ pub unsafe fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_mul_pch(_mm512_setzero_ph(), k, a, b) } @@ -2618,7 +2709,7 @@ pub unsafe fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m5 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mul_round_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mul_round_pch(a: __m512h, b: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask_mul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b) } @@ -2641,20 +2732,22 @@ pub unsafe fn _mm512_mul_round_pch(a: __m512h, b: __m512h) #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_mul_round_pch( +pub fn _mm512_mask_mul_round_pch( src: __m512h, k: __mmask16, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfmulcph_512( - transmute(a), - transmute(b), - transmute(src), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmulcph_512( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } } /// Multiply the packed complex numbers in a and b, and store the results in dst using zeromask k (the element @@ -2675,7 +2768,7 @@ pub unsafe fn _mm512_mask_mul_round_pch( #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_mul_round_pch( +pub fn _mm512_maskz_mul_round_pch( k: __mmask16, a: __m512h, b: __m512h, @@ -2694,7 +2787,7 @@ pub unsafe fn _mm512_maskz_mul_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_sch(_mm_undefined_ph(), 0xff, a, b) } @@ -2708,7 +2801,7 @@ pub unsafe fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -2722,7 +2815,7 @@ pub unsafe fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_sch(_mm_setzero_ph(), k, a, b) } @@ -2745,7 +2838,7 @@ pub unsafe fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mul_round_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mul_round_sch(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_mul_round_sch::(_mm_undefined_ph(), 0xff, a, b) } @@ -2769,20 +2862,22 @@ pub unsafe fn _mm_mul_round_sch(a: __m128h, b: __m128h) -> #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_mul_round_sch( +pub fn _mm_mask_mul_round_sch( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - transmute(vfmulcsh( - transmute(a), - transmute(b), - transmute(src), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmulcsh( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } } /// Multiply the lower complex numbers in a and b, and store the result in the lower elements of dst using @@ -2804,7 +2899,7 @@ pub unsafe fn _mm_mask_mul_round_sch( #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_mul_round_sch( +pub fn _mm_maskz_mul_round_sch( k: __mmask8, a: __m128h, b: __m128h, @@ -2822,7 +2917,7 @@ pub unsafe fn _mm_maskz_mul_round_sch( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { _mm_mul_pch(a, b) } @@ -2835,7 +2930,7 @@ pub unsafe fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_pch(src, k, a, b) } @@ -2848,7 +2943,7 @@ pub unsafe fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_mul_pch(k, a, b) } @@ -2861,7 +2956,7 @@ pub unsafe fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { _mm256_mul_pch(a, b) } @@ -2874,7 +2969,7 @@ pub unsafe fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_mask_mul_pch(src, k, a, b) } @@ -2887,7 +2982,7 @@ pub unsafe fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_maskz_mul_pch(k, a, b) } @@ -2899,7 +2994,7 @@ pub unsafe fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m2 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { _mm512_mul_pch(a, b) } @@ -2912,7 +3007,7 @@ pub unsafe fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_mul_pch(src, k, a, b) } @@ -2925,7 +3020,7 @@ pub unsafe fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_maskz_mul_pch(k, a, b) } @@ -2945,7 +3040,7 @@ pub unsafe fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmul_round_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_fmul_round_pch(a: __m512h, b: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mul_round_pch::(a, b) } @@ -2967,7 +3062,7 @@ pub unsafe fn _mm512_fmul_round_pch(a: __m512h, b: __m512h) #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmul_round_pch( +pub fn _mm512_mask_fmul_round_pch( src: __m512h, k: __mmask16, a: __m512h, @@ -2994,7 +3089,7 @@ pub unsafe fn _mm512_mask_fmul_round_pch( #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmul_round_pch( +pub fn _mm512_maskz_fmul_round_pch( k: __mmask16, a: __m512h, b: __m512h, @@ -3012,7 +3107,7 @@ pub unsafe fn _mm512_maskz_fmul_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { _mm_mul_sch(a, b) } @@ -3025,7 +3120,7 @@ pub unsafe fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_mul_sch(src, k, a, b) } @@ -3038,7 +3133,7 @@ pub unsafe fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_mul_sch(k, a, b) } @@ -3059,7 +3154,7 @@ pub unsafe fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmul_round_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fmul_round_sch(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mul_round_sch::(a, b) } @@ -3082,7 +3177,7 @@ pub unsafe fn _mm_fmul_round_sch(a: __m128h, b: __m128h) -> #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmul_round_sch( +pub fn _mm_mask_fmul_round_sch( src: __m128h, k: __mmask8, a: __m128h, @@ -3110,7 +3205,7 @@ pub unsafe fn _mm_mask_fmul_round_sch( #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmul_round_sch( +pub fn _mm_maskz_fmul_round_sch( k: __mmask8, a: __m128h, b: __m128h, @@ -3129,7 +3224,7 @@ pub unsafe fn _mm_maskz_fmul_round_sch( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_pch(_mm_undefined_ph(), 0xff, a, b) } @@ -3143,8 +3238,8 @@ pub unsafe fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - transmute(vfcmulcph_128(transmute(a), transmute(b), transmute(src), k)) +pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { transmute(vfcmulcph_128(transmute(a), transmute(b), transmute(src), k)) } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and @@ -3157,7 +3252,7 @@ pub unsafe fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_pch(_mm_setzero_ph(), k, a, b) } @@ -3171,7 +3266,7 @@ pub unsafe fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { _mm256_mask_cmul_pch(_mm256_undefined_ph(), 0xff, a, b) } @@ -3185,8 +3280,8 @@ pub unsafe fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { - transmute(vfcmulcph_256(transmute(a), transmute(b), transmute(src), k)) +pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { + unsafe { transmute(vfcmulcph_256(transmute(a), transmute(b), transmute(src), k)) } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and @@ -3199,7 +3294,7 @@ pub unsafe fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_mask_cmul_pch(_mm256_setzero_ph(), k, a, b) } @@ -3213,7 +3308,7 @@ pub unsafe fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m2 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { _mm512_mask_cmul_pch(_mm512_undefined_ph(), 0xffff, a, b) } @@ -3227,7 +3322,7 @@ pub unsafe fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -3241,7 +3336,7 @@ pub unsafe fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_cmul_pch(_mm512_setzero_ph(), k, a, b) } @@ -3264,7 +3359,7 @@ pub unsafe fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cmul_round_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_cmul_round_pch(a: __m512h, b: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask_cmul_round_pch::(_mm512_undefined_ph(), 0xffff, a, b) } @@ -3288,20 +3383,22 @@ pub unsafe fn _mm512_cmul_round_pch(a: __m512h, b: __m512h) #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cmul_round_pch( +pub fn _mm512_mask_cmul_round_pch( src: __m512h, k: __mmask16, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfcmulcph_512( - transmute(a), - transmute(b), - transmute(src), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmulcph_512( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, and @@ -3323,7 +3420,7 @@ pub unsafe fn _mm512_mask_cmul_round_pch( #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cmul_round_pch( +pub fn _mm512_maskz_cmul_round_pch( k: __mmask16, a: __m512h, b: __m512h, @@ -3341,7 +3438,7 @@ pub unsafe fn _mm512_maskz_cmul_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_sch(_mm_undefined_ph(), 0xff, a, b) } @@ -3355,7 +3452,7 @@ pub unsafe fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -3369,7 +3466,7 @@ pub unsafe fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_sch(_mm_setzero_ph(), k, a, b) } @@ -3391,7 +3488,7 @@ pub unsafe fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cmul_round_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_cmul_round_sch(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_cmul_round_sch::(_mm_undefined_ph(), 0xff, a, b) } @@ -3415,20 +3512,22 @@ pub unsafe fn _mm_cmul_round_sch(a: __m128h, b: __m128h) -> #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cmul_round_sch( +pub fn _mm_mask_cmul_round_sch( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - transmute(vfcmulcsh( - transmute(a), - transmute(b), - transmute(src), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmulcsh( + transmute(a), + transmute(b), + transmute(src), + k, + ROUNDING, + )) + } } /// Multiply the lower complex numbers in a by the complex conjugates of the lower complex numbers in b, @@ -3450,7 +3549,7 @@ pub unsafe fn _mm_mask_cmul_round_sch( #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cmul_round_sch( +pub fn _mm_maskz_cmul_round_sch( k: __mmask8, a: __m128h, b: __m128h, @@ -3469,7 +3568,7 @@ pub unsafe fn _mm_maskz_cmul_round_sch( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { _mm_cmul_pch(a, b) } @@ -3483,7 +3582,7 @@ pub unsafe fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_pch(src, k, a, b) } @@ -3497,7 +3596,7 @@ pub unsafe fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_cmul_pch(k, a, b) } @@ -3511,7 +3610,7 @@ pub unsafe fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { _mm256_cmul_pch(a, b) } @@ -3525,7 +3624,7 @@ pub unsafe fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_mask_cmul_pch(src, k, a, b) } @@ -3539,7 +3638,7 @@ pub unsafe fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h { _mm256_maskz_cmul_pch(k, a, b) } @@ -3553,7 +3652,7 @@ pub unsafe fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { _mm512_cmul_pch(a, b) } @@ -3567,7 +3666,7 @@ pub unsafe fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_cmul_pch(src, k, a, b) } @@ -3581,7 +3680,7 @@ pub unsafe fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h { _mm512_maskz_cmul_pch(k, a, b) } @@ -3603,7 +3702,7 @@ pub unsafe fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __ #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_cmul_round_pch::(a, b) } @@ -3627,7 +3726,7 @@ pub unsafe fn _mm512_fcmul_round_pch(a: __m512h, b: __m512h #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fcmul_round_pch( +pub fn _mm512_mask_fcmul_round_pch( src: __m512h, k: __mmask16, a: __m512h, @@ -3656,7 +3755,7 @@ pub unsafe fn _mm512_mask_fcmul_round_pch( #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fcmul_round_pch( +pub fn _mm512_maskz_fcmul_round_pch( k: __mmask16, a: __m512h, b: __m512h, @@ -3675,7 +3774,7 @@ pub unsafe fn _mm512_maskz_fcmul_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { _mm_cmul_sch(a, b) } @@ -3689,7 +3788,7 @@ pub unsafe fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_cmul_sch(src, k, a, b) } @@ -3703,7 +3802,7 @@ pub unsafe fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m12 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmulcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_maskz_cmul_sch(k, a, b) } @@ -3725,7 +3824,7 @@ pub unsafe fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmul_round_sch(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_fcmul_round_sch(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_cmul_round_sch::(a, b) } @@ -3749,7 +3848,7 @@ pub unsafe fn _mm_fcmul_round_sch(a: __m128h, b: __m128h) - #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmul_round_sch( +pub fn _mm_mask_fcmul_round_sch( src: __m128h, k: __mmask8, a: __m128h, @@ -3778,7 +3877,7 @@ pub unsafe fn _mm_mask_fcmul_round_sch( #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmul_round_sch( +pub fn _mm_maskz_fcmul_round_sch( k: __mmask8, a: __m128h, b: __m128h, @@ -3794,8 +3893,8 @@ pub unsafe fn _mm_maskz_fcmul_round_sch( #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_abs_ph(v2: __m128h) -> __m128h { - transmute(_mm_and_si128(transmute(v2), _mm_set1_epi16(i16::MAX))) +pub fn _mm_abs_ph(v2: __m128h) -> __m128h { + unsafe { transmute(_mm_and_si128(transmute(v2), _mm_set1_epi16(i16::MAX))) } } /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing @@ -3805,8 +3904,8 @@ pub unsafe fn _mm_abs_ph(v2: __m128h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_abs_ph(v2: __m256h) -> __m256h { - transmute(_mm256_and_si256(transmute(v2), _mm256_set1_epi16(i16::MAX))) +pub fn _mm256_abs_ph(v2: __m256h) -> __m256h { + unsafe { transmute(_mm256_and_si256(transmute(v2), _mm256_set1_epi16(i16::MAX))) } } /// Finds the absolute value of each packed half-precision (16-bit) floating-point element in v2, storing @@ -3816,8 +3915,8 @@ pub unsafe fn _mm256_abs_ph(v2: __m256h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_abs_ph(v2: __m512h) -> __m512h { - transmute(_mm512_and_si512(transmute(v2), _mm512_set1_epi16(i16::MAX))) +pub fn _mm512_abs_ph(v2: __m512h) -> __m512h { + unsafe { transmute(_mm512_and_si512(transmute(v2), _mm512_set1_epi16(i16::MAX))) } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst. Each complex @@ -3829,8 +3928,8 @@ pub unsafe fn _mm512_abs_ph(v2: __m512h) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_conj_pch(a: __m128h) -> __m128h { - transmute(_mm_xor_si128(transmute(a), _mm_set1_epi32(i32::MIN))) +pub fn _mm_conj_pch(a: __m128h) -> __m128h { + unsafe { transmute(_mm_xor_si128(transmute(a), _mm_set1_epi32(i32::MIN))) } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k @@ -3842,9 +3941,11 @@ pub unsafe fn _mm_conj_pch(a: __m128h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { - let r: __m128 = transmute(_mm_conj_pch(a)); - transmute(simd_select_bitmask(k, r, transmute(src))) +pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k @@ -3856,7 +3957,7 @@ pub unsafe fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128 #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { _mm_mask_conj_pch(_mm_setzero_ph(), k, a) } @@ -3868,8 +3969,8 @@ pub unsafe fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_conj_pch(a: __m256h) -> __m256h { - transmute(_mm256_xor_si256(transmute(a), _mm256_set1_epi32(i32::MIN))) +pub fn _mm256_conj_pch(a: __m256h) -> __m256h { + unsafe { transmute(_mm256_xor_si256(transmute(a), _mm256_set1_epi32(i32::MIN))) } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k @@ -3881,9 +3982,11 @@ pub unsafe fn _mm256_conj_pch(a: __m256h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h { - let r: __m256 = transmute(_mm256_conj_pch(a)); - transmute(simd_select_bitmask(k, r, transmute(src))) +pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k @@ -3895,7 +3998,7 @@ pub unsafe fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { +pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { _mm256_mask_conj_pch(_mm256_setzero_ph(), k, a) } @@ -3907,8 +4010,8 @@ pub unsafe fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_conj_pch(a: __m512h) -> __m512h { - transmute(_mm512_xor_si512(transmute(a), _mm512_set1_epi32(i32::MIN))) +pub fn _mm512_conj_pch(a: __m512h) -> __m512h { + unsafe { transmute(_mm512_xor_si512(transmute(a), _mm512_set1_epi32(i32::MIN))) } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using writemask k @@ -3920,9 +4023,11 @@ pub unsafe fn _mm512_conj_pch(a: __m512h) -> __m512h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h { - let r: __m512 = transmute(_mm512_conj_pch(a)); - transmute(simd_select_bitmask(k, r, transmute(src))) +pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h { + unsafe { + let r: __m512 = transmute(_mm512_conj_pch(a)); + transmute(simd_select_bitmask(k, r, transmute(src))) + } } /// Compute the complex conjugates of complex numbers in a, and store the results in dst using zeromask k @@ -3934,7 +4039,7 @@ pub unsafe fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __ #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { +pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { _mm512_mask_conj_pch(_mm512_setzero_ph(), k, a) } @@ -3947,7 +4052,7 @@ pub unsafe fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_mask3_fmadd_pch(a, b, c, 0xff) } @@ -3961,9 +4066,11 @@ pub unsafe fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let r: __m128 = transmute(_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) +pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -3976,13 +4083,15 @@ pub unsafe fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - transmute(vfmaddcph_mask3_128( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + transmute(vfmaddcph_mask3_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -3995,13 +4104,15 @@ pub unsafe fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - transmute(vfmaddcph_maskz_128( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + transmute(vfmaddcph_maskz_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4013,7 +4124,7 @@ pub unsafe fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { +pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { _mm256_mask3_fmadd_pch(a, b, c, 0xff) } @@ -4027,9 +4138,11 @@ pub unsafe fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { - let r: __m256 = transmute(_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) +pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4042,13 +4155,15 @@ pub unsafe fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { - transmute(vfmaddcph_mask3_256( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { + unsafe { + transmute(vfmaddcph_mask3_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4061,13 +4176,15 @@ pub unsafe fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mm #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - transmute(vfmaddcph_maskz_256( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { + transmute(vfmaddcph_maskz_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4079,7 +4196,7 @@ pub unsafe fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -4093,7 +4210,7 @@ pub unsafe fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) } @@ -4107,7 +4224,7 @@ pub unsafe fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { +pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) } @@ -4121,7 +4238,7 @@ pub unsafe fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mm #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) } @@ -4143,11 +4260,7 @@ pub unsafe fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __ #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmadd_round_pch( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { +pub fn _mm512_fmadd_round_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask3_fmadd_round_pch::(a, b, c, 0xffff) } @@ -4171,15 +4284,17 @@ pub unsafe fn _mm512_fmadd_round_pch( #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmadd_round_pch( +pub fn _mm512_mask_fmadd_round_pch( a: __m512h, k: __mmask16, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r: __m512 = transmute(_mm512_mask3_fmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) + unsafe { + static_assert_rounding!(ROUNDING); + let r: __m512 = transmute(_mm512_mask3_fmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4201,20 +4316,22 @@ pub unsafe fn _mm512_mask_fmadd_round_pch( #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmadd_round_pch( +pub fn _mm512_mask3_fmadd_round_pch( a: __m512h, b: __m512h, c: __m512h, k: __mmask16, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfmaddcph_mask3_512( - transmute(a), - transmute(b), - transmute(c), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcph_mask3_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } } /// Multiply packed complex numbers in a and b, accumulate to the corresponding complex numbers in c, @@ -4236,20 +4353,22 @@ pub unsafe fn _mm512_mask3_fmadd_round_pch( #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmadd_round_pch( +pub fn _mm512_maskz_fmadd_round_pch( k: __mmask16, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfmaddcph_maskz_512( - transmute(a), - transmute(b), - transmute(c), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcph_maskz_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } } /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and @@ -4262,7 +4381,7 @@ pub unsafe fn _mm512_maskz_fmadd_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -4277,7 +4396,7 @@ pub unsafe fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) } @@ -4292,7 +4411,7 @@ pub unsafe fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { +pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) } @@ -4307,7 +4426,7 @@ pub unsafe fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) } @@ -4329,19 +4448,17 @@ pub unsafe fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_round_sch( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - transmute(vfmaddcsh_mask( - transmute(a), - transmute(b), - transmute(c), - 0xff, - ROUNDING, - )) +pub fn _mm_fmadd_round_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfmaddcsh_mask( + transmute(a), + transmute(b), + transmute(c), + 0xff, + ROUNDING, + )) + } } /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and @@ -4364,16 +4481,18 @@ pub unsafe fn _mm_fmadd_round_sch( #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_round_sch( +pub fn _mm_mask_fmadd_round_sch( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let a = transmute(a); - let r = vfmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does - transmute(_mm_mask_move_ss(a, k, a, r)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); // using `0xff` would have been fine here, but this is what CLang does + transmute(_mm_mask_move_ss(a, k, a, r)) + } } /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and @@ -4396,16 +4515,18 @@ pub unsafe fn _mm_mask_fmadd_round_sch( #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_round_sch( +pub fn _mm_mask3_fmadd_round_sch( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let c = transmute(c); - let r = vfmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); - transmute(_mm_move_ss(c, r)) + unsafe { + static_assert_rounding!(ROUNDING); + let c = transmute(c); + let r = vfmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); + transmute(_mm_move_ss(c, r)) + } } /// Multiply the lower complex numbers in a and b, accumulate to the lower complex number in c, and @@ -4428,16 +4549,18 @@ pub unsafe fn _mm_mask3_fmadd_round_sch( #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_round_sch( +pub fn _mm_maskz_fmadd_round_sch( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let a = transmute(a); - let r = vfmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING); - transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING); + transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfmaddcsh_maskz` to output an all-zero vector, which is incorrect + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4450,7 +4573,7 @@ pub unsafe fn _mm_maskz_fmadd_round_sch( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_mask3_fcmadd_pch(a, b, c, 0xff) } @@ -4465,9 +4588,11 @@ pub unsafe fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let r: __m128 = transmute(_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) +pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let r: __m128 = transmute(_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4481,13 +4606,15 @@ pub unsafe fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - transmute(vfcmaddcph_mask3_128( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + transmute(vfcmaddcph_mask3_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4501,13 +4628,15 @@ pub unsafe fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmas #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - transmute(vfcmaddcph_maskz_128( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + transmute(vfcmaddcph_maskz_128( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4520,7 +4649,7 @@ pub unsafe fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { +pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { _mm256_mask3_fcmadd_pch(a, b, c, 0xff) } @@ -4535,9 +4664,11 @@ pub unsafe fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { - let r: __m256 = transmute(_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) +pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h { + unsafe { + let r: __m256 = transmute(_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4551,13 +4682,15 @@ pub unsafe fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { - transmute(vfcmaddcph_mask3_256( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h { + unsafe { + transmute(vfcmaddcph_mask3_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4571,13 +4704,15 @@ pub unsafe fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - transmute(vfcmaddcph_maskz_256( - transmute(a), - transmute(b), - transmute(c), - k, - )) +pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { + transmute(vfcmaddcph_maskz_256( + transmute(a), + transmute(b), + transmute(c), + k, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4590,7 +4725,7 @@ pub unsafe fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -4605,7 +4740,7 @@ pub unsafe fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h { _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) } @@ -4620,7 +4755,7 @@ pub unsafe fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { +pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h { _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) } @@ -4635,7 +4770,7 @@ pub unsafe fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) } @@ -4658,11 +4793,7 @@ pub unsafe fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: _ #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fcmadd_round_pch( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { +pub fn _mm512_fcmadd_round_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask3_fcmadd_round_pch::(a, b, c, 0xffff) } @@ -4687,15 +4818,17 @@ pub unsafe fn _mm512_fcmadd_round_pch( #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fcmadd_round_pch( +pub fn _mm512_mask_fcmadd_round_pch( a: __m512h, k: __mmask16, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - let r: __m512 = transmute(_mm512_mask3_fcmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does - transmute(simd_select_bitmask(k, r, transmute(a))) + unsafe { + static_assert_rounding!(ROUNDING); + let r: __m512 = transmute(_mm512_mask3_fcmadd_round_pch::(a, b, c, k)); // using `0xffff` would have been fine here, but this is what CLang does + transmute(simd_select_bitmask(k, r, transmute(a))) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4718,20 +4851,22 @@ pub unsafe fn _mm512_mask_fcmadd_round_pch( #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fcmadd_round_pch( +pub fn _mm512_mask3_fcmadd_round_pch( a: __m512h, b: __m512h, c: __m512h, k: __mmask16, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfcmaddcph_mask3_512( - transmute(a), - transmute(b), - transmute(c), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcph_mask3_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } } /// Multiply packed complex numbers in a by the complex conjugates of packed complex numbers in b, accumulate @@ -4754,20 +4889,22 @@ pub unsafe fn _mm512_mask3_fcmadd_round_pch( #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fcmadd_round_pch( +pub fn _mm512_maskz_fcmadd_round_pch( k: __mmask16, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - transmute(vfcmaddcph_maskz_512( - transmute(a), - transmute(b), - transmute(c), - k, - ROUNDING, - )) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcph_maskz_512( + transmute(a), + transmute(b), + transmute(c), + k, + ROUNDING, + )) + } } /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, @@ -4781,7 +4918,7 @@ pub unsafe fn _mm512_maskz_fcmadd_round_pch( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -4797,7 +4934,7 @@ pub unsafe fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c) } @@ -4813,7 +4950,7 @@ pub unsafe fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { +pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k) } @@ -4829,7 +4966,7 @@ pub unsafe fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmas #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfcmaddcsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { +pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c) } @@ -4853,19 +4990,17 @@ pub unsafe fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m12 #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fcmadd_round_sch( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - transmute(vfcmaddcsh_mask( - transmute(a), - transmute(b), - transmute(c), - 0xff, - ROUNDING, - )) +pub fn _mm_fcmadd_round_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vfcmaddcsh_mask( + transmute(a), + transmute(b), + transmute(c), + 0xff, + ROUNDING, + )) + } } /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, @@ -4889,16 +5024,18 @@ pub unsafe fn _mm_fcmadd_round_sch( #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fcmadd_round_sch( +pub fn _mm_mask_fcmadd_round_sch( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let a = transmute(a); - let r = vfcmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); - transmute(_mm_mask_move_ss(a, k, a, r)) + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfcmaddcsh_mask(a, transmute(b), transmute(c), k, ROUNDING); + transmute(_mm_mask_move_ss(a, k, a, r)) + } } /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, @@ -4922,16 +5059,18 @@ pub unsafe fn _mm_mask_fcmadd_round_sch( #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fcmadd_round_sch( +pub fn _mm_mask3_fcmadd_round_sch( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let c = transmute(c); - let r = vfcmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); - transmute(_mm_move_ss(c, r)) + unsafe { + static_assert_rounding!(ROUNDING); + let c = transmute(c); + let r = vfcmaddcsh_mask(transmute(a), transmute(b), c, k, ROUNDING); + transmute(_mm_move_ss(c, r)) + } } /// Multiply the lower complex number in a by the complex conjugate of the lower complex number in b, @@ -4955,16 +5094,18 @@ pub unsafe fn _mm_mask3_fcmadd_round_sch( #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fcmadd_round_sch( +pub fn _mm_maskz_fcmadd_round_sch( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let a = transmute(a); - let r = vfcmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING); - transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect + unsafe { + static_assert_rounding!(ROUNDING); + let a = transmute(a); + let r = vfcmaddcsh_maskz(a, transmute(b), transmute(c), k, ROUNDING); + transmute(_mm_move_ss(a, r)) // FIXME: If `k == 0`, then LLVM optimized `vfcmaddcsh_maskz` to output an all-zero vector, which is incorrect + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -4975,8 +5116,8 @@ pub unsafe fn _mm_maskz_fcmadd_round_sch( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_fma(a, b, c) +pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(a, b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -4988,8 +5129,8 @@ pub unsafe fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), a) +pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5001,8 +5142,8 @@ pub unsafe fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), c) +pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5014,8 +5155,8 @@ pub unsafe fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5026,8 +5167,8 @@ pub unsafe fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_fma(a, b, c) +pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(a, b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5039,8 +5180,8 @@ pub unsafe fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), a) +pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5052,8 +5193,8 @@ pub unsafe fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { - simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), c) +pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5065,8 +5206,8 @@ pub unsafe fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mma #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5077,8 +5218,8 @@ pub unsafe fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_fma(a, b, c) +pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(a, b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5090,8 +5231,8 @@ pub unsafe fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), a) +pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5103,8 +5244,8 @@ pub unsafe fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { - simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), c) +pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5116,8 +5257,8 @@ pub unsafe fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mma #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5137,13 +5278,11 @@ pub unsafe fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmadd_round_ph( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddph_512(a, b, c, ROUNDING) +pub fn _mm512_fmadd_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(a, b, c, ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5164,14 +5303,16 @@ pub unsafe fn _mm512_fmadd_round_ph( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmadd_round_ph( +pub fn _mm512_mask_fmadd_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5192,14 +5333,16 @@ pub unsafe fn _mm512_mask_fmadd_round_ph( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmadd_round_ph( +pub fn _mm512_mask3_fmadd_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmadd_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, add the intermediate @@ -5220,18 +5363,20 @@ pub unsafe fn _mm512_mask3_fmadd_round_ph( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmadd_round_ph( +pub fn _mm512_maskz_fmadd_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fmadd_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5243,12 +5388,14 @@ pub unsafe fn _mm512_maskz_fmadd_round_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = fmaf16(extracta, extractb, extractc); - simd_insert!(a, 0, r) +pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(extracta, extractb, extractc); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5261,14 +5408,16 @@ pub unsafe fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let mut fmadd: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmadd = fmaf16(fmadd, extractb, extractc); +pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = fmaf16(fmadd, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5281,14 +5430,16 @@ pub unsafe fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - let mut fmadd: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fmadd = fmaf16(extracta, extractb, fmadd); +pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmadd = fmaf16(extracta, extractb, fmadd); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5301,15 +5452,17 @@ pub unsafe fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let mut fmadd: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmadd = fmaf16(extracta, extractb, extractc); +pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = fmaf16(extracta, extractb, extractc); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5330,17 +5483,15 @@ pub unsafe fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmadd_round_sh( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = vfmaddsh(extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, r) +pub fn _mm_fmadd_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5362,20 +5513,22 @@ pub unsafe fn _mm_fmadd_round_sh( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmadd_round_sh( +pub fn _mm_mask_fmadd_round_sh( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmadd: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmadd = vfmaddsh(fmadd, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = vfmaddsh(fmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5397,20 +5550,22 @@ pub unsafe fn _mm_mask_fmadd_round_sh( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmadd_round_sh( +pub fn _mm_mask3_fmadd_round_sh( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmadd: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fmadd = vfmaddsh(extracta, extractb, fmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmadd = vfmaddsh(extracta, extractb, fmadd, ROUNDING); + } + simd_insert!(c, 0, fmadd) } - simd_insert!(c, 0, fmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and add the intermediate @@ -5432,21 +5587,23 @@ pub unsafe fn _mm_mask3_fmadd_round_sh( #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmadd_round_sh( +pub fn _mm_maskz_fmadd_round_sh( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmadd: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmadd = vfmaddsh(extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmadd = vfmaddsh(extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fmadd) } - simd_insert!(a, 0, fmadd) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5458,8 +5615,8 @@ pub unsafe fn _mm_maskz_fmadd_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_fma(a, b, simd_neg(c)) +pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5471,8 +5628,8 @@ pub unsafe fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), a) +pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5484,8 +5641,8 @@ pub unsafe fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), c) +pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5497,8 +5654,8 @@ pub unsafe fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5509,8 +5666,8 @@ pub unsafe fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_fma(a, b, simd_neg(c)) +pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5522,8 +5679,8 @@ pub unsafe fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), a) +pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5535,8 +5692,8 @@ pub unsafe fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { - simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), c) +pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5548,8 +5705,8 @@ pub unsafe fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mma #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5560,8 +5717,8 @@ pub unsafe fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_fma(a, b, simd_neg(c)) +pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5573,8 +5730,8 @@ pub unsafe fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), a) +pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5586,8 +5743,8 @@ pub unsafe fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { - simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), c) +pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5599,8 +5756,8 @@ pub unsafe fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mma #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5620,13 +5777,11 @@ pub unsafe fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmsub_round_ph( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddph_512(a, b, simd_neg(c), ROUNDING) +pub fn _mm512_fmsub_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5647,14 +5802,16 @@ pub unsafe fn _mm512_fmsub_round_ph( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmsub_round_ph( +pub fn _mm512_mask_fmsub_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5675,14 +5832,16 @@ pub unsafe fn _mm512_mask_fmsub_round_ph( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmsub_round_ph( +pub fn _mm512_mask3_fmsub_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsub_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -5703,18 +5862,20 @@ pub unsafe fn _mm512_mask3_fmsub_round_ph( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmsub_round_ph( +pub fn _mm512_maskz_fmsub_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fmsub_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5726,12 +5887,14 @@ pub unsafe fn _mm512_maskz_fmsub_round_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = fmaf16(extracta, extractb, -extractc); - simd_insert!(a, 0, r) +pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(extracta, extractb, -extractc); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5744,14 +5907,16 @@ pub unsafe fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let mut fmsub: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmsub = fmaf16(fmsub, extractb, -extractc); +pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = fmaf16(fmsub, extractb, -extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5764,14 +5929,16 @@ pub unsafe fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - let mut fmsub: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fmsub = fmaf16(extracta, extractb, -fmsub); +pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmsub = fmaf16(extracta, extractb, -fmsub); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5784,15 +5951,17 @@ pub unsafe fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let mut fmsub: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmsub = fmaf16(extracta, extractb, -extractc); +pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = fmaf16(extracta, extractb, -extractc); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5813,17 +5982,15 @@ pub unsafe fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmsub_round_sh( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = vfmaddsh(extracta, extractb, -extractc, ROUNDING); - simd_insert!(a, 0, r) +pub fn _mm_fmsub_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(extracta, extractb, -extractc, ROUNDING); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5845,20 +6012,22 @@ pub unsafe fn _mm_fmsub_round_sh( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmsub_round_sh( +pub fn _mm_mask_fmsub_round_sh( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmsub: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmsub = vfmaddsh(fmsub, extractb, -extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = vfmaddsh(fmsub, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5880,20 +6049,22 @@ pub unsafe fn _mm_mask_fmsub_round_sh( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmsub_round_sh( +pub fn _mm_mask3_fmsub_round_sh( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmsub: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fmsub = vfmaddsh(extracta, extractb, -fmsub, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fmsub = vfmaddsh(extracta, extractb, -fmsub, ROUNDING); + } + simd_insert!(c, 0, fmsub) } - simd_insert!(c, 0, fmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract packed elements @@ -5907,21 +6078,23 @@ pub unsafe fn _mm_mask3_fmsub_round_sh( #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmsub_round_sh( +pub fn _mm_maskz_fmsub_round_sh( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fmsub: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fmsub = vfmaddsh(extracta, extractb, -extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fmsub = vfmaddsh(extracta, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fmsub) } - simd_insert!(a, 0, fmsub) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5932,8 +6105,8 @@ pub unsafe fn _mm_maskz_fmsub_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_fma(simd_neg(a), b, c) +pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5945,8 +6118,8 @@ pub unsafe fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), a) +pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5958,8 +6131,8 @@ pub unsafe fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), c) +pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5971,8 +6144,8 @@ pub unsafe fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5983,8 +6156,8 @@ pub unsafe fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_fma(simd_neg(a), b, c) +pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -5996,8 +6169,8 @@ pub unsafe fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), a) +pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6009,8 +6182,8 @@ pub unsafe fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { - simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), c) +pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6022,8 +6195,8 @@ pub unsafe fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mm #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6034,8 +6207,8 @@ pub unsafe fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_fma(simd_neg(a), b, c) +pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6047,8 +6220,8 @@ pub unsafe fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), a) +pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6060,8 +6233,8 @@ pub unsafe fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { - simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), c) +pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6073,8 +6246,8 @@ pub unsafe fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mm #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6094,13 +6267,11 @@ pub unsafe fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __ #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fnmadd_round_ph( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddph_512(simd_neg(a), b, c, ROUNDING) +pub fn _mm512_fnmadd_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(simd_neg(a), b, c, ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6121,14 +6292,16 @@ pub unsafe fn _mm512_fnmadd_round_ph( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fnmadd_round_ph( +pub fn _mm512_mask_fnmadd_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6149,14 +6322,16 @@ pub unsafe fn _mm512_mask_fnmadd_round_ph( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fnmadd_round_ph( +pub fn _mm512_mask3_fnmadd_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmadd_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract the intermediate @@ -6177,18 +6352,20 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ph( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fnmadd_round_ph( +pub fn _mm512_maskz_fnmadd_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fnmadd_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fnmadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6200,12 +6377,14 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = fmaf16(-extracta, extractb, extractc); - simd_insert!(a, 0, r) +pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(-extracta, extractb, extractc); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6218,14 +6397,16 @@ pub unsafe fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let mut fnmadd: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmadd = fmaf16(-fnmadd, extractb, extractc); +pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = fmaf16(-fnmadd, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6238,14 +6419,16 @@ pub unsafe fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - let mut fnmadd: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fnmadd = fmaf16(-extracta, extractb, fnmadd); +pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fnmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmadd = fmaf16(-extracta, extractb, fnmadd); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6258,15 +6441,17 @@ pub unsafe fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let mut fnmadd: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmadd = fmaf16(-extracta, extractb, extractc); +pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = fmaf16(-extracta, extractb, extractc); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6287,17 +6472,15 @@ pub unsafe fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmadd_round_sh( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = vfmaddsh(-extracta, extractb, extractc, ROUNDING); - simd_insert!(a, 0, r) +pub fn _mm_fnmadd_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(-extracta, extractb, extractc, ROUNDING); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6319,20 +6502,22 @@ pub unsafe fn _mm_fnmadd_round_sh( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmadd_round_sh( +pub fn _mm_mask_fnmadd_round_sh( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmadd = vfmaddsh(-fnmadd, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = vfmaddsh(-fnmadd, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6354,20 +6539,22 @@ pub unsafe fn _mm_mask_fnmadd_round_sh( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmadd_round_sh( +pub fn _mm_mask3_fnmadd_round_sh( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fnmadd = vfmaddsh(-extracta, extractb, fnmadd, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmadd = vfmaddsh(-extracta, extractb, fnmadd, ROUNDING); + } + simd_insert!(c, 0, fnmadd) } - simd_insert!(c, 0, fnmadd) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6389,21 +6576,23 @@ pub unsafe fn _mm_mask3_fnmadd_round_sh( #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmadd_round_sh( +pub fn _mm_maskz_fnmadd_round_sh( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmadd: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmadd = vfmaddsh(-extracta, extractb, extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmadd: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmadd = vfmaddsh(-extracta, extractb, extractc, ROUNDING); + } + simd_insert!(a, 0, fnmadd) } - simd_insert!(a, 0, fnmadd) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6414,8 +6603,8 @@ pub unsafe fn _mm_maskz_fnmadd_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6427,8 +6616,8 @@ pub unsafe fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), a) +pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6440,8 +6629,8 @@ pub unsafe fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), c) +pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6453,8 +6642,8 @@ pub unsafe fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6465,8 +6654,8 @@ pub unsafe fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6478,8 +6667,8 @@ pub unsafe fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), a) +pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6491,8 +6680,8 @@ pub unsafe fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { - simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), c) +pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6504,8 +6693,8 @@ pub unsafe fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mm #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6516,8 +6705,8 @@ pub unsafe fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6529,8 +6718,8 @@ pub unsafe fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), a) +pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6542,8 +6731,8 @@ pub unsafe fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { - simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), c) +pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6555,8 +6744,8 @@ pub unsafe fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mm #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6576,13 +6765,11 @@ pub unsafe fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __ #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fnmsub_round_ph( - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddph_512(simd_neg(a), b, simd_neg(c), ROUNDING) +pub fn _mm512_fnmsub_round_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddph_512(simd_neg(a), b, simd_neg(c), ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6603,14 +6790,16 @@ pub unsafe fn _mm512_fnmsub_round_ph( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fnmsub_round_ph( +pub fn _mm512_mask_fnmsub_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6631,14 +6820,16 @@ pub unsafe fn _mm512_mask_fnmsub_round_ph( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fnmsub_round_ph( +pub fn _mm512_mask3_fnmsub_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fnmsub_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, subtract packed elements @@ -6659,18 +6850,20 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ph( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fnmsub_round_ph( +pub fn _mm512_maskz_fnmsub_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fnmsub_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fnmsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6682,12 +6875,14 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = fmaf16(-extracta, extractb, -extractc); - simd_insert!(a, 0, r) +pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = fmaf16(-extracta, extractb, -extractc); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6700,14 +6895,16 @@ pub unsafe fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - let mut fnmsub: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmsub = fmaf16(-fnmsub, extractb, -extractc); +pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = fmaf16(-fnmsub, extractb, -extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6720,14 +6917,16 @@ pub unsafe fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - let mut fnmsub: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fnmsub = fmaf16(-extracta, extractb, -fnmsub); +pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { + let mut fnmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmsub = fmaf16(-extracta, extractb, -fnmsub); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6740,15 +6939,17 @@ pub unsafe fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfnmsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - let mut fnmsub: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmsub = fmaf16(-extracta, extractb, -extractc); +pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + let mut fnmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = fmaf16(-extracta, extractb, -extractc); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6769,17 +6970,15 @@ pub unsafe fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fnmsub_round_sh( - a: __m128h, - b: __m128h, - c: __m128h, -) -> __m128h { - static_assert_rounding!(ROUNDING); - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - let r = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); - simd_insert!(a, 0, r) +pub fn _mm_fnmsub_round_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + let r = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); + simd_insert!(a, 0, r) + } } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6801,20 +7000,22 @@ pub unsafe fn _mm_fnmsub_round_sh( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fnmsub_round_sh( +pub fn _mm_mask_fnmsub_round_sh( a: __m128h, k: __mmask8, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f16 = simd_extract!(a, 0); - if k & 1 != 0 { - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmsub = vfmaddsh(-fnmsub, extractb, -extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = simd_extract!(a, 0); + if k & 1 != 0 { + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = vfmaddsh(-fnmsub, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6836,20 +7037,22 @@ pub unsafe fn _mm_mask_fnmsub_round_sh( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fnmsub_round_sh( +pub fn _mm_mask3_fnmsub_round_sh( a: __m128h, b: __m128h, c: __m128h, k: __mmask8, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f16 = simd_extract!(c, 0); - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - fnmsub = vfmaddsh(-extracta, extractb, -fnmsub, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = simd_extract!(c, 0); + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + fnmsub = vfmaddsh(-extracta, extractb, -fnmsub, ROUNDING); + } + simd_insert!(c, 0, fnmsub) } - simd_insert!(c, 0, fnmsub) } /// Multiply the lower half-precision (16-bit) floating-point elements in a and b, and subtract the intermediate @@ -6871,21 +7074,23 @@ pub unsafe fn _mm_mask3_fnmsub_round_sh( #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fnmsub_round_sh( +pub fn _mm_maskz_fnmsub_round_sh( k: __mmask8, a: __m128h, b: __m128h, c: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - let mut fnmsub: f16 = 0.0; - if k & 1 != 0 { - let extracta: f16 = simd_extract!(a, 0); - let extractb: f16 = simd_extract!(b, 0); - let extractc: f16 = simd_extract!(c, 0); - fnmsub = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); + unsafe { + static_assert_rounding!(ROUNDING); + let mut fnmsub: f16 = 0.0; + if k & 1 != 0 { + let extracta: f16 = simd_extract!(a, 0); + let extractb: f16 = simd_extract!(b, 0); + let extractc: f16 = simd_extract!(c, 0); + fnmsub = vfmaddsh(-extracta, extractb, -extractc, ROUNDING); + } + simd_insert!(a, 0, fnmsub) } - simd_insert!(a, 0, fnmsub) } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6896,8 +7101,8 @@ pub unsafe fn _mm_maskz_fnmsub_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - vfmaddsubph_128(a, b, c) +pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { vfmaddsubph_128(a, b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6909,8 +7114,8 @@ pub unsafe fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), a) +pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6922,8 +7127,8 @@ pub unsafe fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), c) +pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6935,8 +7140,8 @@ pub unsafe fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mma #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6947,8 +7152,8 @@ pub unsafe fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m1 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - vfmaddsubph_256(a, b, c) +pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { vfmaddsubph_256(a, b, c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6960,8 +7165,8 @@ pub unsafe fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), a) +pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6973,13 +7178,8 @@ pub unsafe fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fmaddsub_ph( - a: __m256h, - b: __m256h, - c: __m256h, - k: __mmask16, -) -> __m256h { - simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), c) +pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -6991,13 +7191,8 @@ pub unsafe fn _mm256_mask3_fmaddsub_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmaddsub_ph( - k: __mmask16, - a: __m256h, - b: __m256h, - c: __m256h, -) -> __m256h { - simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7008,7 +7203,7 @@ pub unsafe fn _mm256_maskz_fmaddsub_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_fmaddsub_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -7021,8 +7216,8 @@ pub unsafe fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), a) +pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7034,13 +7229,8 @@ pub unsafe fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmaddsub_ph( - a: __m512h, - b: __m512h, - c: __m512h, - k: __mmask32, -) -> __m512h { - simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), c) +pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7052,13 +7242,8 @@ pub unsafe fn _mm512_mask3_fmaddsub_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmaddsub_ph( - k: __mmask32, - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7078,13 +7263,15 @@ pub unsafe fn _mm512_maskz_fmaddsub_ph( #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmaddsub_round_ph( +pub fn _mm512_fmaddsub_round_ph( a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddsubph_512(a, b, c, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubph_512(a, b, c, ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7105,14 +7292,16 @@ pub unsafe fn _mm512_fmaddsub_round_ph( #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmaddsub_round_ph( +pub fn _mm512_mask_fmaddsub_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7133,14 +7322,16 @@ pub unsafe fn _mm512_mask_fmaddsub_round_ph( #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmaddsub_round_ph( +pub fn _mm512_mask3_fmaddsub_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmaddsub_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively add and @@ -7161,18 +7352,20 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_ph( #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmaddsub_round_ph( +pub fn _mm512_maskz_fmaddsub_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fmaddsub_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmaddsub_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7183,8 +7376,8 @@ pub unsafe fn _mm512_maskz_fmaddsub_round_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { - vfmaddsubph_128(a, b, simd_neg(c)) +pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { vfmaddsubph_128(a, b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7196,8 +7389,8 @@ pub unsafe fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), a) +pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7209,8 +7402,8 @@ pub unsafe fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { - simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), c) +pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7222,8 +7415,8 @@ pub unsafe fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mma #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), _mm_setzero_ph()) +pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), _mm_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7234,8 +7427,8 @@ pub unsafe fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m1 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { - vfmaddsubph_256(a, b, simd_neg(c)) +pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { vfmaddsubph_256(a, b, simd_neg(c)) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7247,8 +7440,8 @@ pub unsafe fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), a) +pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7260,13 +7453,8 @@ pub unsafe fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask3_fmsubadd_ph( - a: __m256h, - b: __m256h, - c: __m256h, - k: __mmask16, -) -> __m256h { - simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), c) +pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7278,13 +7466,8 @@ pub unsafe fn _mm256_mask3_fmsubadd_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_fmsubadd_ph( - k: __mmask16, - a: __m256h, - b: __m256h, - c: __m256h, -) -> __m256h { - simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), _mm256_setzero_ph()) +pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), _mm256_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7295,7 +7478,7 @@ pub unsafe fn _mm256_maskz_fmsubadd_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { +pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h { _mm512_fmsubadd_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b, c) } @@ -7308,8 +7491,8 @@ pub unsafe fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), a) +pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), a) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7321,13 +7504,8 @@ pub unsafe fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmsubadd_ph( - a: __m512h, - b: __m512h, - c: __m512h, - k: __mmask32, -) -> __m512h { - simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), c) +pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), c) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7339,13 +7517,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmsubadd_ph( - k: __mmask32, - a: __m512h, - b: __m512h, - c: __m512h, -) -> __m512h { - simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), _mm512_setzero_ph()) +pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), _mm512_setzero_ph()) } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7365,13 +7538,15 @@ pub unsafe fn _mm512_maskz_fmsubadd_ph( #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fmsubadd_round_ph( +pub fn _mm512_fmsubadd_round_ph( a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - vfmaddsubph_512(a, b, simd_neg(c), ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vfmaddsubph_512(a, b, simd_neg(c), ROUNDING) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7392,14 +7567,16 @@ pub unsafe fn _mm512_fmsubadd_round_ph( #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fmsubadd_round_ph( +pub fn _mm512_mask_fmsubadd_round_ph( a: __m512h, k: __mmask32, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), a) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), a) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7420,14 +7597,16 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ph( #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask3_fmsubadd_round_ph( +pub fn _mm512_mask3_fmsubadd_round_ph( a: __m512h, b: __m512h, c: __m512h, k: __mmask32, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), c) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_fmsubadd_round_ph::(a, b, c), c) + } } /// Multiply packed half-precision (16-bit) floating-point elements in a and b, alternatively subtract @@ -7448,18 +7627,20 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ph( #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_fmsubadd_round_ph( +pub fn _mm512_maskz_fmsubadd_round_ph( k: __mmask32, a: __m512h, b: __m512h, c: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask( - k, - _mm512_fmsubadd_round_ph::(a, b, c), - _mm512_setzero_ph(), - ) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask( + k, + _mm512_fmsubadd_round_ph::(a, b, c), + _mm512_setzero_ph(), + ) + } } /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst`. @@ -7470,7 +7651,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_rcp_ph(a: __m128h) -> __m128h { +pub fn _mm_rcp_ph(a: __m128h) -> __m128h { _mm_mask_rcp_ph(_mm_undefined_ph(), 0xff, a) } @@ -7483,8 +7664,8 @@ pub unsafe fn _mm_rcp_ph(a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { - vrcpph_128(a, src, k) +pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vrcpph_128(a, src, k) } } /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` @@ -7496,7 +7677,7 @@ pub unsafe fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { _mm_mask_rcp_ph(_mm_setzero_ph(), k, a) } @@ -7508,7 +7689,7 @@ pub unsafe fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_rcp_ph(a: __m256h) -> __m256h { +pub fn _mm256_rcp_ph(a: __m256h) -> __m256h { _mm256_mask_rcp_ph(_mm256_undefined_ph(), 0xffff, a) } @@ -7521,8 +7702,8 @@ pub unsafe fn _mm256_rcp_ph(a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { - vrcpph_256(a, src, k) +pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vrcpph_256(a, src, k) } } /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` @@ -7534,7 +7715,7 @@ pub unsafe fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { +pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { _mm256_mask_rcp_ph(_mm256_setzero_ph(), k, a) } @@ -7546,7 +7727,7 @@ pub unsafe fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_rcp_ph(a: __m512h) -> __m512h { +pub fn _mm512_rcp_ph(a: __m512h) -> __m512h { _mm512_mask_rcp_ph(_mm512_undefined_ph(), 0xffffffff, a) } @@ -7559,8 +7740,8 @@ pub unsafe fn _mm512_rcp_ph(a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { - vrcpph_512(a, src, k) +pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { vrcpph_512(a, src, k) } } /// Compute the approximate reciprocal of packed 16-bit floating-point elements in `a` and stores the results in `dst` @@ -7572,7 +7753,7 @@ pub unsafe fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { _mm512_mask_rcp_ph(_mm512_setzero_ph(), k, a) } @@ -7586,7 +7767,7 @@ pub unsafe fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_rcp_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -7600,8 +7781,8 @@ pub unsafe fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - vrcpsh(a, b, src, k) +pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vrcpsh(a, b, src, k) } } /// Compute the approximate reciprocal of the lower half-precision (16-bit) floating-point element in b, @@ -7614,7 +7795,7 @@ pub unsafe fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrcpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_rcp_sh(_mm_setzero_ph(), k, a, b) } @@ -7627,7 +7808,7 @@ pub unsafe fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_rsqrt_ph(a: __m128h) -> __m128h { +pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h { _mm_mask_rsqrt_ph(_mm_undefined_ph(), 0xff, a) } @@ -7641,8 +7822,8 @@ pub unsafe fn _mm_rsqrt_ph(a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { - vrsqrtph_128(a, src, k) +pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vrsqrtph_128(a, src, k) } } /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point @@ -7655,7 +7836,7 @@ pub unsafe fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { _mm_mask_rsqrt_ph(_mm_setzero_ph(), k, a) } @@ -7668,7 +7849,7 @@ pub unsafe fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { +pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { _mm256_mask_rsqrt_ph(_mm256_undefined_ph(), 0xffff, a) } @@ -7682,8 +7863,8 @@ pub unsafe fn _mm256_rsqrt_ph(a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { - vrsqrtph_256(a, src, k) +pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vrsqrtph_256(a, src, k) } } /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point @@ -7696,7 +7877,7 @@ pub unsafe fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { +pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { _mm256_mask_rsqrt_ph(_mm256_setzero_ph(), k, a) } @@ -7709,7 +7890,7 @@ pub unsafe fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { +pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { _mm512_mask_rsqrt_ph(_mm512_undefined_ph(), 0xffffffff, a) } @@ -7723,8 +7904,8 @@ pub unsafe fn _mm512_rsqrt_ph(a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { - vrsqrtph_512(a, src, k) +pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { vrsqrtph_512(a, src, k) } } /// Compute the approximate reciprocal square root of packed half-precision (16-bit) floating-point @@ -7737,7 +7918,7 @@ pub unsafe fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { _mm512_mask_rsqrt_ph(_mm512_setzero_ph(), k, a) } @@ -7751,7 +7932,7 @@ pub unsafe fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_rsqrt_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -7765,8 +7946,8 @@ pub unsafe fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - vrsqrtsh(a, b, src, k) +pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vrsqrtsh(a, b, src, k) } } /// Compute the approximate reciprocal square root of the lower half-precision (16-bit) floating-point @@ -7779,7 +7960,7 @@ pub unsafe fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vrsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_rsqrt_sh(_mm_setzero_ph(), k, a, b) } @@ -7791,8 +7972,8 @@ pub unsafe fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sqrt_ph(a: __m128h) -> __m128h { - simd_fsqrt(a) +pub fn _mm_sqrt_ph(a: __m128h) -> __m128h { + unsafe { simd_fsqrt(a) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7803,8 +7984,8 @@ pub unsafe fn _mm_sqrt_ph(a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_sqrt_ph(a), src) +pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), src) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7815,8 +7996,8 @@ pub unsafe fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_sqrt_ph(a), _mm_setzero_ph()) +pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), _mm_setzero_ph()) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7827,8 +8008,8 @@ pub unsafe fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_sqrt_ph(a: __m256h) -> __m256h { - simd_fsqrt(a) +pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h { + unsafe { simd_fsqrt(a) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7839,8 +8020,8 @@ pub unsafe fn _mm256_sqrt_ph(a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_sqrt_ph(a), src) +pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), src) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7851,8 +8032,8 @@ pub unsafe fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_sqrt_ph(a), _mm256_setzero_ph()) +pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), _mm256_setzero_ph()) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7863,8 +8044,8 @@ pub unsafe fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_sqrt_ph(a: __m512h) -> __m512h { - simd_fsqrt(a) +pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h { + unsafe { simd_fsqrt(a) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7875,8 +8056,8 @@ pub unsafe fn _mm512_sqrt_ph(a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_sqrt_ph(a), src) +pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), src) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7887,8 +8068,8 @@ pub unsafe fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_sqrt_ph(a), _mm512_setzero_ph()) +pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), _mm512_setzero_ph()) } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7907,9 +8088,11 @@ pub unsafe fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_sqrt_round_ph(a: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - vsqrtph_512(a, ROUNDING) +pub fn _mm512_sqrt_round_ph(a: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtph_512(a, ROUNDING) + } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7928,13 +8111,15 @@ pub unsafe fn _mm512_sqrt_round_ph(a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_sqrt_round_ph( +pub fn _mm512_mask_sqrt_round_ph( src: __m512h, k: __mmask32, a: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), src) + } } /// Compute the square root of packed half-precision (16-bit) floating-point elements in a, and store the @@ -7953,9 +8138,11 @@ pub unsafe fn _mm512_mask_sqrt_round_ph( #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_sqrt_round_ph(k: __mmask32, a: __m512h) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), _mm512_setzero_ph()) +pub fn _mm512_maskz_sqrt_round_ph(k: __mmask32, a: __m512h) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_sqrt_round_ph::(a), _mm512_setzero_ph()) + } } /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store @@ -7967,7 +8154,7 @@ pub unsafe fn _mm512_maskz_sqrt_round_ph(k: __mmask32, a: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_sqrt_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -7980,7 +8167,7 @@ pub unsafe fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -7993,7 +8180,7 @@ pub unsafe fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vsqrtsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_sqrt_sh(_mm_setzero_ph(), k, a, b) } @@ -8014,7 +8201,7 @@ pub unsafe fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_sqrt_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_sqrt_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_sqrt_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -8036,14 +8223,16 @@ pub unsafe fn _mm_sqrt_round_sh(a: __m128h, b: __m128h) -> #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_sqrt_round_sh( +pub fn _mm_mask_sqrt_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vsqrtsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vsqrtsh(a, b, src, k, ROUNDING) + } } /// Compute the square root of the lower half-precision (16-bit) floating-point element in b, store @@ -8063,7 +8252,7 @@ pub unsafe fn _mm_mask_sqrt_round_sh( #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_sqrt_round_sh( +pub fn _mm_maskz_sqrt_round_sh( k: __mmask8, a: __m128h, b: __m128h, @@ -8081,8 +8270,8 @@ pub unsafe fn _mm_maskz_sqrt_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { - vmaxph_128(a, b) +pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { vmaxph_128(a, b) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8095,8 +8284,8 @@ pub unsafe fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_max_ph(a, b), src) +pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8109,8 +8298,8 @@ pub unsafe fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_max_ph(a, b), _mm_setzero_ph()) +pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), _mm_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8122,8 +8311,8 @@ pub unsafe fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { - vmaxph_256(a, b) +pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { vmaxph_256(a, b) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8136,8 +8325,8 @@ pub unsafe fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_max_ph(a, b), src) +pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8150,8 +8339,8 @@ pub unsafe fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_max_ph(a, b), _mm256_setzero_ph()) +pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), _mm256_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8163,7 +8352,7 @@ pub unsafe fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -8177,8 +8366,8 @@ pub unsafe fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_max_ph(a, b), src) +pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8191,8 +8380,8 @@ pub unsafe fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vmaxph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_max_ph(a, b), _mm512_setzero_ph()) +pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), _mm512_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8206,9 +8395,11 @@ pub unsafe fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_max_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_sae!(SAE); - vmaxph_512(a, b, SAE) +pub fn _mm512_max_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + vmaxph_512(a, b, SAE) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8222,14 +8413,16 @@ pub unsafe fn _mm512_max_round_ph(a: __m512h, b: __m512h) -> __m #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_max_round_ph( +pub fn _mm512_mask_max_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_sae!(SAE); - simd_select_bitmask(k, _mm512_max_round_ph::(a, b), src) + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_max_round_ph::(a, b), src) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed maximum @@ -8243,13 +8436,11 @@ pub unsafe fn _mm512_mask_max_round_ph( #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_max_round_ph( - k: __mmask32, - a: __m512h, - b: __m512h, -) -> __m512h { - static_assert_sae!(SAE); - simd_select_bitmask(k, _mm512_max_round_ph::(a, b), _mm512_setzero_ph()) +pub fn _mm512_maskz_max_round_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_max_round_ph::(a, b), _mm512_setzero_ph()) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum @@ -8262,7 +8453,7 @@ pub unsafe fn _mm512_maskz_max_round_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_max_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -8276,7 +8467,7 @@ pub unsafe fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -8290,7 +8481,7 @@ pub unsafe fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vmaxsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_max_sh(_mm_setzero_ph(), k, a, b) } @@ -8305,7 +8496,7 @@ pub unsafe fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_max_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_max_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_max_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -8322,14 +8513,16 @@ pub unsafe fn _mm_max_round_sh(a: __m128h, b: __m128h) -> __m128 #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_max_round_sh( +pub fn _mm_mask_max_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_sae!(SAE); - vmaxsh(a, b, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vmaxsh(a, b, src, k, SAE) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the maximum value @@ -8344,11 +8537,7 @@ pub unsafe fn _mm_mask_max_round_sh( #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_max_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_max_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_max_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -8362,8 +8551,8 @@ pub unsafe fn _mm_maskz_max_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { - vminph_128(a, b) +pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { + unsafe { vminph_128(a, b) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8376,8 +8565,8 @@ pub unsafe fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_min_ph(a, b), src) +pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8390,8 +8579,8 @@ pub unsafe fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - simd_select_bitmask(k, _mm_min_ph(a, b), _mm_setzero_ph()) +pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), _mm_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8403,8 +8592,8 @@ pub unsafe fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { - vminph_256(a, b) +pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { + unsafe { vminph_256(a, b) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8417,8 +8606,8 @@ pub unsafe fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_min_ph(a, b), src) +pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8431,8 +8620,8 @@ pub unsafe fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m2 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - simd_select_bitmask(k, _mm256_min_ph(a, b), _mm256_setzero_ph()) +pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), _mm256_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8444,7 +8633,7 @@ pub unsafe fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m25 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b) } @@ -8458,8 +8647,8 @@ pub unsafe fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_min_ph(a, b), src) +pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), src) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8472,8 +8661,8 @@ pub unsafe fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vminph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - simd_select_bitmask(k, _mm512_min_ph(a, b), _mm512_setzero_ph()) +pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), _mm512_setzero_ph()) } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8486,9 +8675,11 @@ pub unsafe fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vminph, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_min_round_ph(a: __m512h, b: __m512h) -> __m512h { - static_assert_sae!(SAE); - vminph_512(a, b, SAE) +pub fn _mm512_min_round_ph(a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + vminph_512(a, b, SAE) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8502,14 +8693,16 @@ pub unsafe fn _mm512_min_round_ph(a: __m512h, b: __m512h) -> __m #[cfg_attr(test, assert_instr(vminph, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_min_round_ph( +pub fn _mm512_mask_min_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_sae!(SAE); - simd_select_bitmask(k, _mm512_min_round_ph::(a, b), src) + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_min_round_ph::(a, b), src) + } } /// Compare packed half-precision (16-bit) floating-point elements in a and b, and store packed minimum @@ -8523,13 +8716,11 @@ pub unsafe fn _mm512_mask_min_round_ph( #[cfg_attr(test, assert_instr(vminph, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_min_round_ph( - k: __mmask32, - a: __m512h, - b: __m512h, -) -> __m512h { - static_assert_sae!(SAE); - simd_select_bitmask(k, _mm512_min_round_ph::(a, b), _mm512_setzero_ph()) +pub fn _mm512_maskz_min_round_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { + static_assert_sae!(SAE); + simd_select_bitmask(k, _mm512_min_round_ph::(a, b), _mm512_setzero_ph()) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum @@ -8542,7 +8733,7 @@ pub unsafe fn _mm512_maskz_min_round_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_min_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -8556,7 +8747,7 @@ pub unsafe fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -8570,7 +8761,7 @@ pub unsafe fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vminsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_min_sh(_mm_setzero_ph(), k, a, b) } @@ -8585,7 +8776,7 @@ pub unsafe fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vminsh, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_min_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_min_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_min_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -8602,14 +8793,16 @@ pub unsafe fn _mm_min_round_sh(a: __m128h, b: __m128h) -> __m128 #[cfg_attr(test, assert_instr(vminsh, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_min_round_sh( +pub fn _mm_mask_min_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_sae!(SAE); - vminsh(a, b, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vminsh(a, b, src, k, SAE) + } } /// Compare the lower half-precision (16-bit) floating-point elements in a and b, store the minimum value @@ -8624,11 +8817,7 @@ pub unsafe fn _mm_mask_min_round_sh( #[cfg_attr(test, assert_instr(vminsh, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_min_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_min_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_min_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -8642,7 +8831,7 @@ pub unsafe fn _mm_maskz_min_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getexp_ph(a: __m128h) -> __m128h { +pub fn _mm_getexp_ph(a: __m128h) -> __m128h { _mm_mask_getexp_ph(_mm_undefined_ph(), 0xff, a) } @@ -8656,8 +8845,8 @@ pub unsafe fn _mm_getexp_ph(a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { - vgetexpph_128(a, src, k) +pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { vgetexpph_128(a, src, k) } } /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision @@ -8670,7 +8859,7 @@ pub unsafe fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { _mm_mask_getexp_ph(_mm_setzero_ph(), k, a) } @@ -8683,7 +8872,7 @@ pub unsafe fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_getexp_ph(a: __m256h) -> __m256h { +pub fn _mm256_getexp_ph(a: __m256h) -> __m256h { _mm256_mask_getexp_ph(_mm256_undefined_ph(), 0xffff, a) } @@ -8697,8 +8886,8 @@ pub unsafe fn _mm256_getexp_ph(a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { - vgetexpph_256(a, src, k) +pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { vgetexpph_256(a, src, k) } } /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision @@ -8711,7 +8900,7 @@ pub unsafe fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { +pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { _mm256_mask_getexp_ph(_mm256_setzero_ph(), k, a) } @@ -8724,7 +8913,7 @@ pub unsafe fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_getexp_ph(a: __m512h) -> __m512h { +pub fn _mm512_getexp_ph(a: __m512h) -> __m512h { _mm512_mask_getexp_ph(_mm512_undefined_ph(), 0xffffffff, a) } @@ -8738,7 +8927,7 @@ pub unsafe fn _mm512_getexp_ph(a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a) } @@ -8752,7 +8941,7 @@ pub unsafe fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { _mm512_mask_getexp_ph(_mm512_setzero_ph(), k, a) } @@ -8767,7 +8956,7 @@ pub unsafe fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_getexp_round_ph(a: __m512h) -> __m512h { +pub fn _mm512_getexp_round_ph(a: __m512h) -> __m512h { static_assert_sae!(SAE); _mm512_mask_getexp_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) } @@ -8783,13 +8972,15 @@ pub unsafe fn _mm512_getexp_round_ph(a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_getexp_round_ph( +pub fn _mm512_mask_getexp_round_ph( src: __m512h, k: __mmask32, a: __m512h, ) -> __m512h { - static_assert_sae!(SAE); - vgetexpph_512(a, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vgetexpph_512(a, src, k, SAE) + } } /// Convert the exponent of each packed half-precision (16-bit) floating-point element in a to a half-precision @@ -8803,7 +8994,7 @@ pub unsafe fn _mm512_mask_getexp_round_ph( #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m512h) -> __m512h { static_assert_sae!(SAE); _mm512_mask_getexp_round_ph::(_mm512_setzero_ph(), k, a) } @@ -8818,7 +9009,7 @@ pub unsafe fn _mm512_maskz_getexp_round_ph(k: __mmask32, a: __m5 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_getexp_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -8833,7 +9024,7 @@ pub unsafe fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -8848,7 +9039,7 @@ pub unsafe fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m12 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetexpsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_getexp_sh(_mm_setzero_ph(), k, a, b) } @@ -8864,7 +9055,7 @@ pub unsafe fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128 #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_getexp_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -8881,14 +9072,16 @@ pub unsafe fn _mm_getexp_round_sh(a: __m128h, b: __m128h) -> __m #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getexp_round_sh( +pub fn _mm_mask_getexp_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_sae!(SAE); - vgetexpsh(a, b, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vgetexpsh(a, b, src, k, SAE) + } } /// Convert the exponent of the lower half-precision (16-bit) floating-point element in b to a half-precision @@ -8903,11 +9096,7 @@ pub unsafe fn _mm_mask_getexp_round_sh( #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getexp_round_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_getexp_round_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_sae!(SAE); _mm_mask_getexp_round_sh::(_mm_setzero_ph(), k, a, b) } @@ -8935,10 +9124,7 @@ pub unsafe fn _mm_maskz_getexp_round_sh( #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getmant_ph< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm_getmant_ph( a: __m128h, ) -> __m128h { static_assert_uimm_bits!(NORM, 4); @@ -8970,7 +9156,7 @@ pub unsafe fn _mm_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getmant_ph< +pub fn _mm_mask_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -8978,9 +9164,11 @@ pub unsafe fn _mm_mask_getmant_ph< k: __mmask8, a: __m128h, ) -> __m128h { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - vgetmantph_128(a, (SIGN << 2) | NORM, src, k) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + vgetmantph_128(a, (SIGN << 2) | NORM, src, k) + } } /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store @@ -9007,7 +9195,7 @@ pub unsafe fn _mm_mask_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getmant_ph< +pub fn _mm_maskz_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9042,10 +9230,7 @@ pub unsafe fn _mm_maskz_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_getmant_ph< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm256_getmant_ph( a: __m256h, ) -> __m256h { static_assert_uimm_bits!(NORM, 4); @@ -9077,7 +9262,7 @@ pub unsafe fn _mm256_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_getmant_ph< +pub fn _mm256_mask_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9085,9 +9270,11 @@ pub unsafe fn _mm256_mask_getmant_ph< k: __mmask16, a: __m256h, ) -> __m256h { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - vgetmantph_256(a, (SIGN << 2) | NORM, src, k) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + vgetmantph_256(a, (SIGN << 2) | NORM, src, k) + } } /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store @@ -9114,7 +9301,7 @@ pub unsafe fn _mm256_mask_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_getmant_ph< +pub fn _mm256_maskz_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9149,10 +9336,7 @@ pub unsafe fn _mm256_maskz_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_getmant_ph< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +pub fn _mm512_getmant_ph( a: __m512h, ) -> __m512h { static_assert_uimm_bits!(NORM, 4); @@ -9184,7 +9368,7 @@ pub unsafe fn _mm512_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_getmant_ph< +pub fn _mm512_mask_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9221,7 +9405,7 @@ pub unsafe fn _mm512_mask_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_getmant_ph< +pub fn _mm512_maskz_getmant_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9259,7 +9443,7 @@ pub unsafe fn _mm512_maskz_getmant_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_getmant_round_ph< +pub fn _mm512_getmant_round_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9299,7 +9483,7 @@ pub unsafe fn _mm512_getmant_round_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4, 5)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_getmant_round_ph< +pub fn _mm512_mask_getmant_round_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9308,10 +9492,12 @@ pub unsafe fn _mm512_mask_getmant_round_ph< k: __mmask32, a: __m512h, ) -> __m512h { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_sae!(SAE); - vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + vgetmantph_512(a, (SIGN << 2) | NORM, src, k, SAE) + } } /// Normalize the mantissas of packed half-precision (16-bit) floating-point elements in a, and store @@ -9341,7 +9527,7 @@ pub unsafe fn _mm512_mask_getmant_round_ph< #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_getmant_round_ph< +pub fn _mm512_maskz_getmant_round_ph< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9378,11 +9564,8 @@ pub unsafe fn _mm512_maskz_getmant_round_ph< #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(2, 3)] -#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getmant_sh< - const NORM: _MM_MANTISSA_NORM_ENUM, - const SIGN: _MM_MANTISSA_SIGN_ENUM, ->( +#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] +pub fn _mm_getmant_sh( a: __m128h, b: __m128h, ) -> __m128h { @@ -9416,7 +9599,7 @@ pub unsafe fn _mm_getmant_sh< #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getmant_sh< +pub fn _mm_mask_getmant_sh< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9455,7 +9638,7 @@ pub unsafe fn _mm_mask_getmant_sh< #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getmant_sh< +pub fn _mm_maskz_getmant_sh< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, >( @@ -9495,7 +9678,7 @@ pub unsafe fn _mm_maskz_getmant_sh< #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_getmant_round_sh< +pub fn _mm_getmant_round_sh< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9536,7 +9719,7 @@ pub unsafe fn _mm_getmant_round_sh< #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5, 6)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_getmant_round_sh< +pub fn _mm_mask_getmant_round_sh< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9546,10 +9729,12 @@ pub unsafe fn _mm_mask_getmant_round_sh< a: __m128h, b: __m128h, ) -> __m128h { - static_assert_uimm_bits!(NORM, 4); - static_assert_uimm_bits!(SIGN, 2); - static_assert_sae!(SAE); - vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE) + unsafe { + static_assert_uimm_bits!(NORM, 4); + static_assert_uimm_bits!(SIGN, 2); + static_assert_sae!(SAE); + vgetmantsh(a, b, (SIGN << 2) | NORM, src, k, SAE) + } } /// Normalize the mantissas of the lower half-precision (16-bit) floating-point element in b, store @@ -9579,7 +9764,7 @@ pub unsafe fn _mm_mask_getmant_round_sh< #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4, 5)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_getmant_round_sh< +pub fn _mm_maskz_getmant_round_sh< const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM, const SAE: i32, @@ -9611,7 +9796,7 @@ pub unsafe fn _mm_maskz_getmant_round_sh< #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_roundscale_ph(a: __m128h) -> __m128h { +pub fn _mm_roundscale_ph(a: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_roundscale_ph::(_mm_undefined_ph(), 0xff, a) } @@ -9634,13 +9819,11 @@ pub unsafe fn _mm_roundscale_ph(a: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_roundscale_ph( - src: __m128h, - k: __mmask8, - a: __m128h, -) -> __m128h { - static_assert_uimm_bits!(IMM8, 8); - vrndscaleph_128(a, IMM8, src, k) +pub fn _mm_mask_roundscale_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vrndscaleph_128(a, IMM8, src, k) + } } /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits @@ -9661,7 +9844,7 @@ pub unsafe fn _mm_mask_roundscale_ph( #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_roundscale_ph::(_mm_setzero_ph(), k, a) } @@ -9683,7 +9866,7 @@ pub unsafe fn _mm_maskz_roundscale_ph(k: __mmask8, a: __m128h) #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_roundscale_ph(a: __m256h) -> __m256h { +pub fn _mm256_roundscale_ph(a: __m256h) -> __m256h { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_roundscale_ph::(_mm256_undefined_ph(), 0xffff, a) } @@ -9706,13 +9889,15 @@ pub unsafe fn _mm256_roundscale_ph(a: __m256h) -> __m256h { #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_roundscale_ph( +pub fn _mm256_mask_roundscale_ph( src: __m256h, k: __mmask16, a: __m256h, ) -> __m256h { - static_assert_uimm_bits!(IMM8, 8); - vrndscaleph_256(a, IMM8, src, k) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vrndscaleph_256(a, IMM8, src, k) + } } /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits @@ -9733,7 +9918,7 @@ pub unsafe fn _mm256_mask_roundscale_ph( #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m256h) -> __m256h { +pub fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m256h) -> __m256h { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_roundscale_ph::(_mm256_setzero_ph(), k, a) } @@ -9755,7 +9940,7 @@ pub unsafe fn _mm256_maskz_roundscale_ph(k: __mmask16, a: __m25 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_roundscale_ph(a: __m512h) -> __m512h { +pub fn _mm512_roundscale_ph(a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_roundscale_ph::(_mm512_undefined_ph(), 0xffffffff, a) } @@ -9778,7 +9963,7 @@ pub unsafe fn _mm512_roundscale_ph(a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_roundscale_ph( +pub fn _mm512_mask_roundscale_ph( src: __m512h, k: __mmask32, a: __m512h, @@ -9805,7 +9990,7 @@ pub unsafe fn _mm512_mask_roundscale_ph( #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_roundscale_ph::(_mm512_setzero_ph(), k, a) } @@ -9828,7 +10013,7 @@ pub unsafe fn _mm512_maskz_roundscale_ph(k: __mmask32, a: __m51 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_roundscale_round_ph(a: __m512h) -> __m512h { +pub fn _mm512_roundscale_round_ph(a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm512_mask_roundscale_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) @@ -9853,14 +10038,16 @@ pub unsafe fn _mm512_roundscale_round_ph(a: __m #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_roundscale_round_ph( +pub fn _mm512_mask_roundscale_round_ph( src: __m512h, k: __mmask32, a: __m512h, ) -> __m512h { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - vrndscaleph_512(a, IMM8, src, k, SAE) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vrndscaleph_512(a, IMM8, src, k, SAE) + } } /// Round packed half-precision (16-bit) floating-point elements in a to the number of fraction bits @@ -9881,7 +10068,7 @@ pub unsafe fn _mm512_mask_roundscale_round_ph( #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_roundscale_round_ph( +pub fn _mm512_maskz_roundscale_round_ph( k: __mmask32, a: __m512h, ) -> __m512h { @@ -9908,7 +10095,7 @@ pub unsafe fn _mm512_maskz_roundscale_round_ph( #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_roundscale_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -9931,7 +10118,7 @@ pub unsafe fn _mm_roundscale_sh(a: __m128h, b: __m128h) -> __m1 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_roundscale_sh( +pub fn _mm_mask_roundscale_sh( src: __m128h, k: __mmask8, a: __m128h, @@ -9959,11 +10146,7 @@ pub unsafe fn _mm_mask_roundscale_sh( #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_roundscale_sh( - k: __mmask8, - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_maskz_roundscale_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_roundscale_sh::(_mm_setzero_ph(), k, a, b) } @@ -9988,10 +10171,7 @@ pub unsafe fn _mm_maskz_roundscale_sh( #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_roundscale_round_sh( - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_roundscale_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm_mask_roundscale_round_sh::(_mm_undefined_ph(), 0xff, a, b) @@ -10017,15 +10197,17 @@ pub unsafe fn _mm_roundscale_round_sh( #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_roundscale_round_sh( +pub fn _mm_mask_roundscale_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - vrndscalesh(a, b, src, k, IMM8, SAE) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vrndscalesh(a, b, src, k, IMM8, SAE) + } } /// Round the lower half-precision (16-bit) floating-point element in b to the number of fraction bits @@ -10048,7 +10230,7 @@ pub unsafe fn _mm_mask_roundscale_round_sh( #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_roundscale_round_sh( +pub fn _mm_maskz_roundscale_round_sh( k: __mmask8, a: __m128h, b: __m128h, @@ -10066,7 +10248,7 @@ pub unsafe fn _mm_maskz_roundscale_round_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { _mm_mask_scalef_ph(_mm_undefined_ph(), 0xff, a, b) } @@ -10078,8 +10260,8 @@ pub unsafe fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - vscalefph_128(a, b, src, k) +pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { vscalefph_128(a, b, src, k) } } /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store @@ -10090,7 +10272,7 @@ pub unsafe fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m12 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_scalef_ph(_mm_setzero_ph(), k, a, b) } @@ -10102,7 +10284,7 @@ pub unsafe fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { _mm256_mask_scalef_ph(_mm256_undefined_ph(), 0xffff, a, b) } @@ -10114,8 +10296,8 @@ pub unsafe fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - vscalefph_256(a, b, src, k) +pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { vscalefph_256(a, b, src, k) } } /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store @@ -10126,7 +10308,7 @@ pub unsafe fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { +pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { _mm256_mask_scalef_ph(_mm256_setzero_ph(), k, a, b) } @@ -10138,7 +10320,7 @@ pub unsafe fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { _mm512_mask_scalef_ph(_mm512_undefined_ph(), 0xffffffff, a, b) } @@ -10150,7 +10332,7 @@ pub unsafe fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -10162,7 +10344,7 @@ pub unsafe fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { _mm512_mask_scalef_ph(_mm512_setzero_ph(), k, a, b) } @@ -10183,7 +10365,7 @@ pub unsafe fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __ #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_scalef_round_ph(a: __m512h, b: __m512h) -> __m512h { +pub fn _mm512_scalef_round_ph(a: __m512h, b: __m512h) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask_scalef_round_ph::(_mm512_undefined_ph(), 0xffffffff, a, b) } @@ -10205,14 +10387,16 @@ pub unsafe fn _mm512_scalef_round_ph(a: __m512h, b: __m512h #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_scalef_round_ph( +pub fn _mm512_mask_scalef_round_ph( src: __m512h, k: __mmask32, a: __m512h, b: __m512h, ) -> __m512h { - static_assert_rounding!(ROUNDING); - vscalefph_512(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vscalefph_512(a, b, src, k, ROUNDING) + } } /// Scale the packed half-precision (16-bit) floating-point elements in a using values from b, and store @@ -10232,7 +10416,7 @@ pub unsafe fn _mm512_mask_scalef_round_ph( #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_scalef_round_ph( +pub fn _mm512_maskz_scalef_round_ph( k: __mmask32, a: __m512h, b: __m512h, @@ -10250,7 +10434,7 @@ pub unsafe fn _mm512_maskz_scalef_round_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { _mm_mask_scalef_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -10263,7 +10447,7 @@ pub unsafe fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b) } @@ -10276,7 +10460,7 @@ pub unsafe fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m12 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vscalefsh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { _mm_mask_scalef_sh(_mm_setzero_ph(), k, a, b) } @@ -10298,7 +10482,7 @@ pub unsafe fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128 #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_scalef_round_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_scalef_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_scalef_round_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -10321,14 +10505,16 @@ pub unsafe fn _mm_scalef_round_sh(a: __m128h, b: __m128h) - #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_scalef_round_sh( +pub fn _mm_mask_scalef_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vscalefsh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vscalefsh(a, b, src, k, ROUNDING) + } } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store @@ -10349,7 +10535,7 @@ pub unsafe fn _mm_mask_scalef_round_sh( #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_scalef_round_sh( +pub fn _mm_maskz_scalef_round_sh( k: __mmask8, a: __m128h, b: __m128h, @@ -10375,7 +10561,7 @@ pub unsafe fn _mm_maskz_scalef_round_sh( #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_ph(a: __m128h) -> __m128h { +pub fn _mm_reduce_ph(a: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ph::(_mm_undefined_ph(), 0xff, a) } @@ -10398,13 +10584,11 @@ pub unsafe fn _mm_reduce_ph(a: __m128h) -> __m128h { #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_reduce_ph( - src: __m128h, - k: __mmask8, - a: __m128h, -) -> __m128h { - static_assert_uimm_bits!(IMM8, 8); - vreduceph_128(a, IMM8, src, k) +pub fn _mm_mask_reduce_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vreduceph_128(a, IMM8, src, k) + } } /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the @@ -10425,7 +10609,7 @@ pub unsafe fn _mm_mask_reduce_ph( #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> __m128h { +pub fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_ph::(_mm_setzero_ph(), k, a) } @@ -10447,7 +10631,7 @@ pub unsafe fn _mm_maskz_reduce_ph(k: __mmask8, a: __m128h) -> _ #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_reduce_ph(a: __m256h) -> __m256h { +pub fn _mm256_reduce_ph(a: __m256h) -> __m256h { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_ph::(_mm256_undefined_ph(), 0xffff, a) } @@ -10470,13 +10654,11 @@ pub unsafe fn _mm256_reduce_ph(a: __m256h) -> __m256h { #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_reduce_ph( - src: __m256h, - k: __mmask16, - a: __m256h, -) -> __m256h { - static_assert_uimm_bits!(IMM8, 8); - vreduceph_256(a, IMM8, src, k) +pub fn _mm256_mask_reduce_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vreduceph_256(a, IMM8, src, k) + } } /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the @@ -10497,7 +10679,7 @@ pub unsafe fn _mm256_mask_reduce_ph( #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) -> __m256h { +pub fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) -> __m256h { static_assert_uimm_bits!(IMM8, 8); _mm256_mask_reduce_ph::(_mm256_setzero_ph(), k, a) } @@ -10519,7 +10701,7 @@ pub unsafe fn _mm256_maskz_reduce_ph(k: __mmask16, a: __m256h) #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_ph(a: __m512h) -> __m512h { +pub fn _mm512_reduce_ph(a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_ph::(_mm512_undefined_ph(), 0xffffffff, a) } @@ -10542,11 +10724,7 @@ pub unsafe fn _mm512_reduce_ph(a: __m512h) -> __m512h { #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_reduce_ph( - src: __m512h, - k: __mmask32, - a: __m512h, -) -> __m512h { +pub fn _mm512_mask_reduce_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_round_ph::(src, k, a) } @@ -10569,7 +10747,7 @@ pub unsafe fn _mm512_mask_reduce_ph( #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) -> __m512h { +pub fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); _mm512_mask_reduce_ph::(_mm512_setzero_ph(), k, a) } @@ -10593,7 +10771,7 @@ pub unsafe fn _mm512_maskz_reduce_ph(k: __mmask32, a: __m512h) #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(1, 2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_round_ph(a: __m512h) -> __m512h { +pub fn _mm512_reduce_round_ph(a: __m512h) -> __m512h { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm512_mask_reduce_round_ph::(_mm512_undefined_ph(), 0xffffffff, a) @@ -10619,14 +10797,16 @@ pub unsafe fn _mm512_reduce_round_ph(a: __m512h #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_reduce_round_ph( +pub fn _mm512_mask_reduce_round_ph( src: __m512h, k: __mmask32, a: __m512h, ) -> __m512h { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - vreduceph_512(a, IMM8, src, k, SAE) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vreduceph_512(a, IMM8, src, k, SAE) + } } /// Extract the reduced argument of packed half-precision (16-bit) floating-point elements in a by the @@ -10649,7 +10829,7 @@ pub unsafe fn _mm512_mask_reduce_round_ph( #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_reduce_round_ph( +pub fn _mm512_maskz_reduce_round_ph( k: __mmask32, a: __m512h, ) -> __m512h { @@ -10676,7 +10856,7 @@ pub unsafe fn _mm512_maskz_reduce_round_ph( #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -10700,7 +10880,7 @@ pub unsafe fn _mm_reduce_sh(a: __m128h, b: __m128h) -> __m128h #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_reduce_sh( +pub fn _mm_mask_reduce_sh( src: __m128h, k: __mmask8, a: __m128h, @@ -10729,7 +10909,7 @@ pub unsafe fn _mm_mask_reduce_sh( #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { +pub fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); _mm_mask_reduce_sh::(_mm_setzero_ph(), k, a, b) } @@ -10754,10 +10934,7 @@ pub unsafe fn _mm_maskz_reduce_sh(k: __mmask8, a: __m128h, b: _ #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_round_sh( - a: __m128h, - b: __m128h, -) -> __m128h { +pub fn _mm_reduce_round_sh(a: __m128h, b: __m128h) -> __m128h { static_assert_uimm_bits!(IMM8, 8); static_assert_sae!(SAE); _mm_mask_reduce_round_sh::(_mm_undefined_ph(), 0xff, a, b) @@ -10784,15 +10961,17 @@ pub unsafe fn _mm_reduce_round_sh( #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_reduce_round_sh( +pub fn _mm_mask_reduce_round_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128h, ) -> __m128h { - static_assert_uimm_bits!(IMM8, 8); - static_assert_sae!(SAE); - vreducesh(a, b, src, k, IMM8, SAE) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + static_assert_sae!(SAE); + vreducesh(a, b, src, k, IMM8, SAE) + } } /// Extract the reduced argument of the lower half-precision (16-bit) floating-point element in b by @@ -10816,7 +10995,7 @@ pub unsafe fn _mm_mask_reduce_round_sh( #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_reduce_round_sh( +pub fn _mm_maskz_reduce_round_sh( k: __mmask8, a: __m128h, b: __m128h, @@ -10833,12 +11012,14 @@ pub unsafe fn _mm_maskz_reduce_round_sh( #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_add_ph(a: __m128h) -> f16 { - let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); - let a = _mm_add_ph(a, b); - let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); - let a = _mm_add_ph(a, b); - simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1) +pub fn _mm_reduce_add_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_add_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_add_ph(a, b); + simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the @@ -10848,10 +11029,12 @@ pub unsafe fn _mm_reduce_add_ph(a: __m128h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_reduce_add_ph(a: __m256h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - _mm_reduce_add_ph(_mm_add_ph(p, q)) +pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_add_ph(_mm_add_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by addition. Returns the @@ -10861,16 +11044,18 @@ pub unsafe fn _mm256_reduce_add_ph(a: __m256h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_add_ph(a: __m512h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - let q = simd_shuffle!( - a, - a, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 - ] - ); - _mm256_reduce_add_ph(_mm256_add_ph(p, q)) +pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_add_ph(_mm256_add_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns @@ -10880,12 +11065,14 @@ pub unsafe fn _mm512_reduce_add_ph(a: __m512h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_mul_ph(a: __m128h) -> f16 { - let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); - let a = _mm_mul_ph(a, b); - let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); - let a = _mm_mul_ph(a, b); - simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1) +pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_mul_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_mul_ph(a, b); + simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns @@ -10895,10 +11082,12 @@ pub unsafe fn _mm_reduce_mul_ph(a: __m128h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - _mm_reduce_mul_ph(_mm_mul_ph(p, q)) +pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_mul_ph(_mm_mul_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by multiplication. Returns @@ -10909,15 +11098,17 @@ pub unsafe fn _mm256_reduce_mul_ph(a: __m256h) -> f16 { #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - let q = simd_shuffle!( - a, - a, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 - ] - ); - _mm256_reduce_mul_ph(_mm256_mul_ph(p, q)) + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_mul_ph(_mm256_mul_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the @@ -10927,13 +11118,15 @@ pub unsafe fn _mm512_reduce_mul_ph(a: __m512h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_min_ph(a: __m128h) -> f16 { - let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); - let a = _mm_min_ph(a, b); - let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); - let a = _mm_min_ph(a, b); - let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); - simd_extract!(_mm_min_sh(a, b), 0) +pub fn _mm_reduce_min_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_min_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_min_ph(a, b); + let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); + simd_extract!(_mm_min_sh(a, b), 0) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the @@ -10943,10 +11136,12 @@ pub unsafe fn _mm_reduce_min_ph(a: __m128h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_reduce_min_ph(a: __m256h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - _mm_reduce_min_ph(_mm_min_ph(p, q)) +pub fn _mm256_reduce_min_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_min_ph(_mm_min_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by minimum. Returns the @@ -10956,16 +11151,18 @@ pub unsafe fn _mm256_reduce_min_ph(a: __m256h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_min_ph(a: __m512h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - let q = simd_shuffle!( - a, - a, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 - ] - ); - _mm256_reduce_min_ph(_mm256_min_ph(p, q)) +pub fn _mm512_reduce_min_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_min_ph(_mm256_min_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the @@ -10975,13 +11172,15 @@ pub unsafe fn _mm512_reduce_min_ph(a: __m512h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_reduce_max_ph(a: __m128h) -> f16 { - let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); - let a = _mm_max_ph(a, b); - let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); - let a = _mm_max_ph(a, b); - let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); - simd_extract!(_mm_max_sh(a, b), 0) +pub fn _mm_reduce_max_ph(a: __m128h) -> f16 { + unsafe { + let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]); + let a = _mm_max_ph(a, b); + let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]); + let a = _mm_max_ph(a, b); + let b = simd_shuffle!(a, a, [1, 0, 2, 3, 4, 5, 6, 7]); + simd_extract!(_mm_max_sh(a, b), 0) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the @@ -10991,10 +11190,12 @@ pub unsafe fn _mm_reduce_max_ph(a: __m128h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_reduce_max_ph(a: __m256h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - _mm_reduce_max_ph(_mm_max_ph(p, q)) +pub fn _mm256_reduce_max_ph(a: __m256h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + _mm_reduce_max_ph(_mm_max_ph(p, q)) + } } /// Reduce the packed half-precision (16-bit) floating-point elements in a by maximum. Returns the @@ -11004,16 +11205,18 @@ pub unsafe fn _mm256_reduce_max_ph(a: __m256h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_reduce_max_ph(a: __m512h) -> f16 { - let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); - let q = simd_shuffle!( - a, - a, - [ - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 - ] - ); - _mm256_reduce_max_ph(_mm256_max_ph(p, q)) +pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 { + unsafe { + let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + let q = simd_shuffle!( + a, + a, + [ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + ] + ); + _mm256_reduce_max_ph(_mm256_max_ph(p, q)) + } } macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics @@ -11061,9 +11264,11 @@ macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask8, xmm_reg, a) +pub fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask8, xmm_reg, a) + } } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11086,9 +11291,11 @@ pub unsafe fn _mm_fpclass_ph_mask(a: __m128h) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask8, k1, xmm_reg, a) +pub fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask8, k1, xmm_reg, a) + } } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11110,9 +11317,11 @@ pub unsafe fn _mm_mask_fpclass_ph_mask(k1: __mmask8, a: __m128h #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask16, ymm_reg, a) +pub fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask16, ymm_reg, a) + } } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11135,9 +11344,11 @@ pub unsafe fn _mm256_fpclass_ph_mask(a: __m256h) -> __mmask16 { #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) -> __mmask16 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask16, k1, ymm_reg, a) +pub fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m256h) -> __mmask16 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask16, k1, ymm_reg, a) + } } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11159,9 +11370,11 @@ pub unsafe fn _mm256_mask_fpclass_ph_mask(k1: __mmask16, a: __m #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask32, zmm_reg, a) +pub fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask32, zmm_reg, a) + } } /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified @@ -11184,9 +11397,11 @@ pub unsafe fn _mm512_fpclass_ph_mask(a: __m512h) -> __mmask32 { #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m512h) -> __mmask32 { - static_assert_uimm_bits!(IMM8, 8); - fpclass_asm!(__mmask32, k1, zmm_reg, a) +pub fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m512h) -> __mmask32 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + fpclass_asm!(__mmask32, k1, zmm_reg, a) + } } /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified @@ -11208,7 +11423,7 @@ pub unsafe fn _mm512_mask_fpclass_ph_mask(k1: __mmask32, a: __m #[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_fpclass_sh_mask(a: __m128h) -> __mmask8 { +pub fn _mm_fpclass_sh_mask(a: __m128h) -> __mmask8 { _mm_mask_fpclass_sh_mask::(0xff, a) } @@ -11232,9 +11447,11 @@ pub unsafe fn _mm_fpclass_sh_mask(a: __m128h) -> __mmask8 { #[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_fpclass_sh_mask(k1: __mmask8, a: __m128h) -> __mmask8 { - static_assert_uimm_bits!(IMM8, 8); - vfpclasssh(a, IMM8, k1) +pub fn _mm_mask_fpclass_sh_mask(k1: __mmask8, a: __m128h) -> __mmask8 { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + vfpclasssh(a, IMM8, k1) + } } /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, @@ -11244,8 +11461,8 @@ pub unsafe fn _mm_mask_fpclass_sh_mask(k1: __mmask8, a: __m128h #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { - simd_select_bitmask(k, b, a) +pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h { + unsafe { simd_select_bitmask(k, b, a) } } /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, @@ -11255,8 +11472,8 @@ pub unsafe fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { - simd_select_bitmask(k, b, a) +pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h { + unsafe { simd_select_bitmask(k, b, a) } } /// Blend packed half-precision (16-bit) floating-point elements from a and b using control mask k, @@ -11266,8 +11483,8 @@ pub unsafe fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m2 #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { - simd_select_bitmask(k, b, a) +pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h { + unsafe { simd_select_bitmask(k, b, a) } } /// Shuffle half-precision (16-bit) floating-point elements in a and b using the corresponding selector @@ -11277,7 +11494,7 @@ pub unsafe fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m5 #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h { +pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h { _mm_castsi128_ph(_mm_permutex2var_epi16( _mm_castph_si128(a), idx, @@ -11292,7 +11509,7 @@ pub unsafe fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m12 #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h { +pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h { _mm256_castsi256_ph(_mm256_permutex2var_epi16( _mm256_castph_si256(a), idx, @@ -11307,7 +11524,7 @@ pub unsafe fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __ #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h { +pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h { _mm512_castsi512_ph(_mm512_permutex2var_epi16( _mm512_castph_si512(a), idx, @@ -11322,7 +11539,7 @@ pub unsafe fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __ #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { +pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { _mm_castsi128_ph(_mm_permutexvar_epi16(idx, _mm_castph_si128(a))) } @@ -11333,7 +11550,7 @@ pub unsafe fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h { #[inline] #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { +pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, _mm256_castph_si256(a))) } @@ -11344,7 +11561,7 @@ pub unsafe fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { +pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, _mm512_castph_si512(a))) } @@ -11356,8 +11573,8 @@ pub unsafe fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { - vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { + unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11369,8 +11586,8 @@ pub unsafe fn _mm_cvtepi16_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - simd_select_bitmask(k, _mm_cvtepi16_ph(a), src) +pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_cvtepi16_ph(a), src) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11381,7 +11598,7 @@ pub unsafe fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepi16_ph(_mm_setzero_ph(), k, a) } @@ -11393,8 +11610,8 @@ pub unsafe fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { - vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { + unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11406,8 +11623,8 @@ pub unsafe fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { - simd_select_bitmask(k, _mm256_cvtepi16_ph(a), src) +pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_cvtepi16_ph(a), src) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11418,7 +11635,7 @@ pub unsafe fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { +pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { _mm256_mask_cvtepi16_ph(_mm256_setzero_ph(), k, a) } @@ -11430,8 +11647,8 @@ pub unsafe fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { - vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { + unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11443,8 +11660,8 @@ pub unsafe fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { - simd_select_bitmask(k, _mm512_cvtepi16_ph(a), src) +pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi16_ph(a), src) } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11455,7 +11672,7 @@ pub unsafe fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { +pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { _mm512_mask_cvtepi16_ph(_mm512_setzero_ph(), k, a) } @@ -11476,9 +11693,11 @@ pub unsafe fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h { #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepi16_ph(a: __m512i) -> __m512h { - static_assert_rounding!(ROUNDING); - vcvtw2ph_512(a.as_i16x32(), ROUNDING) +pub fn _mm512_cvt_roundepi16_ph(a: __m512i) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtw2ph_512(a.as_i16x32(), ROUNDING) + } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11499,13 +11718,15 @@ pub unsafe fn _mm512_cvt_roundepi16_ph(a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepi16_ph( +pub fn _mm512_mask_cvt_roundepi16_ph( src: __m512h, k: __mmask32, a: __m512i, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepi16_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi16_ph::(a), src) + } } /// Convert packed signed 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11525,10 +11746,7 @@ pub unsafe fn _mm512_mask_cvt_roundepi16_ph( #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepi16_ph( - k: __mmask32, - a: __m512i, -) -> __m512h { +pub fn _mm512_maskz_cvt_roundepi16_ph(k: __mmask32, a: __m512i) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepi16_ph::(_mm512_setzero_ph(), k, a) } @@ -11541,8 +11759,8 @@ pub unsafe fn _mm512_maskz_cvt_roundepi16_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { - vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { + unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11554,8 +11772,8 @@ pub unsafe fn _mm_cvtepu16_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - simd_select_bitmask(k, _mm_cvtepu16_ph(a), src) +pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm_cvtepu16_ph(a), src) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11566,7 +11784,7 @@ pub unsafe fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepu16_ph(_mm_setzero_ph(), k, a) } @@ -11578,8 +11796,8 @@ pub unsafe fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { - vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { + unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11591,8 +11809,8 @@ pub unsafe fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { - simd_select_bitmask(k, _mm256_cvtepu16_ph(a), src) +pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm256_cvtepu16_ph(a), src) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11603,7 +11821,7 @@ pub unsafe fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { +pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { _mm256_mask_cvtepu16_ph(_mm256_setzero_ph(), k, a) } @@ -11615,8 +11833,8 @@ pub unsafe fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { - vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { + unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11628,8 +11846,8 @@ pub unsafe fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { - simd_select_bitmask(k, _mm512_cvtepu16_ph(a), src) +pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu16_ph(a), src) } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11640,7 +11858,7 @@ pub unsafe fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuw2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { +pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { _mm512_mask_cvtepu16_ph(_mm512_setzero_ph(), k, a) } @@ -11661,9 +11879,11 @@ pub unsafe fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h { #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepu16_ph(a: __m512i) -> __m512h { - static_assert_rounding!(ROUNDING); - vcvtuw2ph_512(a.as_u16x32(), ROUNDING) +pub fn _mm512_cvt_roundepu16_ph(a: __m512i) -> __m512h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtuw2ph_512(a.as_u16x32(), ROUNDING) + } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11684,13 +11904,15 @@ pub unsafe fn _mm512_cvt_roundepu16_ph(a: __m512i) -> __m51 #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepu16_ph( +pub fn _mm512_mask_cvt_roundepu16_ph( src: __m512h, k: __mmask32, a: __m512i, ) -> __m512h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepu16_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu16_ph::(a), src) + } } /// Convert packed unsigned 16-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11710,10 +11932,7 @@ pub unsafe fn _mm512_mask_cvt_roundepu16_ph( #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepu16_ph( - k: __mmask32, - a: __m512i, -) -> __m512h { +pub fn _mm512_maskz_cvt_roundepu16_ph(k: __mmask32, a: __m512i) -> __m512h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepu16_ph::(_mm512_setzero_ph(), k, a) } @@ -11726,7 +11945,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu16_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { +pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { _mm_mask_cvtepi32_ph(_mm_setzero_ph(), 0xff, a) } @@ -11739,8 +11958,8 @@ pub unsafe fn _mm_cvtepi32_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - vcvtdq2ph_128(a.as_i32x4(), src, k) +pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11752,7 +11971,7 @@ pub unsafe fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepi32_ph(_mm_setzero_ph(), k, a) } @@ -11764,8 +11983,8 @@ pub unsafe fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { - vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { + unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11777,8 +11996,8 @@ pub unsafe fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { - simd_select_bitmask(k, _mm256_cvtepi32_ph(a), src) +pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm256_cvtepi32_ph(a), src) } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11789,7 +12008,7 @@ pub unsafe fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { +pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { _mm256_mask_cvtepi32_ph(_mm_setzero_ph(), k, a) } @@ -11801,8 +12020,8 @@ pub unsafe fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { - vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { + unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11814,8 +12033,8 @@ pub unsafe fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { - simd_select_bitmask(k, _mm512_cvtepi32_ph(a), src) +pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi32_ph(a), src) } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11826,7 +12045,7 @@ pub unsafe fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtdq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { +pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { _mm512_mask_cvtepi32_ph(_mm256_setzero_ph(), k, a) } @@ -11847,9 +12066,11 @@ pub unsafe fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h { #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepi32_ph(a: __m512i) -> __m256h { - static_assert_rounding!(ROUNDING); - vcvtdq2ph_512(a.as_i32x16(), ROUNDING) +pub fn _mm512_cvt_roundepi32_ph(a: __m512i) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtdq2ph_512(a.as_i32x16(), ROUNDING) + } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11870,13 +12091,15 @@ pub unsafe fn _mm512_cvt_roundepi32_ph(a: __m512i) -> __m25 #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepi32_ph( +pub fn _mm512_mask_cvt_roundepi32_ph( src: __m256h, k: __mmask16, a: __m512i, ) -> __m256h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepi32_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi32_ph::(a), src) + } } /// Convert packed signed 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11896,10 +12119,7 @@ pub unsafe fn _mm512_mask_cvt_roundepi32_ph( #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepi32_ph( - k: __mmask16, - a: __m512i, -) -> __m256h { +pub fn _mm512_maskz_cvt_roundepi32_ph(k: __mmask16, a: __m512i) -> __m256h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepi32_ph::(_mm256_setzero_ph(), k, a) } @@ -11913,8 +12133,8 @@ pub unsafe fn _mm512_maskz_cvt_roundepi32_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsi2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { - vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { + unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } } /// Convert the signed 32-bit integer b to a half-precision (16-bit) floating-point element, store the @@ -11935,9 +12155,11 @@ pub unsafe fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h { #[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundi32_sh(a: __m128h, b: i32) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtsi2sh(a, b, ROUNDING) +pub fn _mm_cvt_roundi32_sh(a: __m128h, b: i32) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsi2sh(a, b, ROUNDING) + } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11948,7 +12170,7 @@ pub unsafe fn _mm_cvt_roundi32_sh(a: __m128h, b: i32) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { +pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { _mm_mask_cvtepu32_ph(_mm_setzero_ph(), 0xff, a) } @@ -11961,8 +12183,8 @@ pub unsafe fn _mm_cvtepu32_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - vcvtudq2ph_128(a.as_u32x4(), src, k) +pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11974,7 +12196,7 @@ pub unsafe fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepu32_ph(_mm_setzero_ph(), k, a) } @@ -11986,8 +12208,8 @@ pub unsafe fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { - vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { + unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -11999,8 +12221,8 @@ pub unsafe fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { - simd_select_bitmask(k, _mm256_cvtepu32_ph(a), src) +pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm256_cvtepu32_ph(a), src) } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12011,7 +12233,7 @@ pub unsafe fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { +pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { _mm256_mask_cvtepu32_ph(_mm_setzero_ph(), k, a) } @@ -12023,8 +12245,8 @@ pub unsafe fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { - vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { + unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12036,8 +12258,8 @@ pub unsafe fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { - simd_select_bitmask(k, _mm512_cvtepu32_ph(a), src) +pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu32_ph(a), src) } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12048,7 +12270,7 @@ pub unsafe fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtudq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { +pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { _mm512_mask_cvtepu32_ph(_mm256_setzero_ph(), k, a) } @@ -12069,9 +12291,11 @@ pub unsafe fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h { #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepu32_ph(a: __m512i) -> __m256h { - static_assert_rounding!(ROUNDING); - vcvtudq2ph_512(a.as_u32x16(), ROUNDING) +pub fn _mm512_cvt_roundepu32_ph(a: __m512i) -> __m256h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtudq2ph_512(a.as_u32x16(), ROUNDING) + } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12092,13 +12316,15 @@ pub unsafe fn _mm512_cvt_roundepu32_ph(a: __m512i) -> __m25 #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepu32_ph( +pub fn _mm512_mask_cvt_roundepu32_ph( src: __m256h, k: __mmask16, a: __m512i, ) -> __m256h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepu32_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu32_ph::(a), src) + } } /// Convert packed unsigned 32-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12118,10 +12344,7 @@ pub unsafe fn _mm512_mask_cvt_roundepu32_ph( #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepu32_ph( - k: __mmask16, - a: __m512i, -) -> __m256h { +pub fn _mm512_maskz_cvt_roundepu32_ph(k: __mmask16, a: __m512i) -> __m256h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepu32_ph::(_mm256_setzero_ph(), k, a) } @@ -12135,8 +12358,8 @@ pub unsafe fn _mm512_maskz_cvt_roundepu32_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtusi2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { - vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { + unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) } } /// Convert the unsigned 32-bit integer b to a half-precision (16-bit) floating-point element, store the @@ -12157,9 +12380,11 @@ pub unsafe fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h { #[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundu32_sh(a: __m128h, b: u32) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtusi2sh(a, b, ROUNDING) +pub fn _mm_cvt_roundu32_sh(a: __m128h, b: u32) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtusi2sh(a, b, ROUNDING) + } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12170,7 +12395,7 @@ pub unsafe fn _mm_cvt_roundu32_sh(a: __m128h, b: u32) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { +pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { _mm_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a) } @@ -12183,8 +12408,8 @@ pub unsafe fn _mm_cvtepi64_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - vcvtqq2ph_128(a.as_i64x2(), src, k) +pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12196,7 +12421,7 @@ pub unsafe fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepi64_ph(_mm_setzero_ph(), k, a) } @@ -12208,7 +12433,7 @@ pub unsafe fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { +pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a) } @@ -12221,8 +12446,8 @@ pub unsafe fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { - vcvtqq2ph_256(a.as_i64x4(), src, k) +pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12234,7 +12459,7 @@ pub unsafe fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { +pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), k, a) } @@ -12246,8 +12471,8 @@ pub unsafe fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { - vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { + unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12259,8 +12484,8 @@ pub unsafe fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { - simd_select_bitmask(k, _mm512_cvtepi64_ph(a), src) +pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm512_cvtepi64_ph(a), src) } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12271,7 +12496,7 @@ pub unsafe fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { +pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { _mm512_mask_cvtepi64_ph(_mm_setzero_ph(), k, a) } @@ -12292,9 +12517,11 @@ pub unsafe fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h { #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepi64_ph(a: __m512i) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtqq2ph_512(a.as_i64x8(), ROUNDING) +pub fn _mm512_cvt_roundepi64_ph(a: __m512i) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtqq2ph_512(a.as_i64x8(), ROUNDING) + } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12315,13 +12542,15 @@ pub unsafe fn _mm512_cvt_roundepi64_ph(a: __m512i) -> __m12 #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepi64_ph( +pub fn _mm512_mask_cvt_roundepi64_ph( src: __m128h, k: __mmask8, a: __m512i, ) -> __m128h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepi64_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepi64_ph::(a), src) + } } /// Convert packed signed 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12341,10 +12570,7 @@ pub unsafe fn _mm512_mask_cvt_roundepi64_ph( #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepi64_ph( - k: __mmask8, - a: __m512i, -) -> __m128h { +pub fn _mm512_maskz_cvt_roundepi64_ph(k: __mmask8, a: __m512i) -> __m128h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepi64_ph::(_mm_setzero_ph(), k, a) } @@ -12357,7 +12583,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepi64_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { +pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { _mm_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a) } @@ -12370,8 +12596,8 @@ pub unsafe fn _mm_cvtepu64_ph(a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { - vcvtuqq2ph_128(a.as_u64x2(), src, k) +pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h { + unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12383,7 +12609,7 @@ pub unsafe fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { +pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { _mm_mask_cvtepu64_ph(_mm_setzero_ph(), k, a) } @@ -12395,7 +12621,7 @@ pub unsafe fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { +pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a) } @@ -12408,8 +12634,8 @@ pub unsafe fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { - vcvtuqq2ph_256(a.as_u64x4(), src, k) +pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h { + unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12421,7 +12647,7 @@ pub unsafe fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { +pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), k, a) } @@ -12433,8 +12659,8 @@ pub unsafe fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { - vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { + unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12446,8 +12672,8 @@ pub unsafe fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { - simd_select_bitmask(k, _mm512_cvtepu64_ph(a), src) +pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h { + unsafe { simd_select_bitmask(k, _mm512_cvtepu64_ph(a), src) } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12458,7 +12684,7 @@ pub unsafe fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtuqq2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { +pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { _mm512_mask_cvtepu64_ph(_mm_setzero_ph(), k, a) } @@ -12479,9 +12705,11 @@ pub unsafe fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h { #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundepu64_ph(a: __m512i) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtuqq2ph_512(a.as_u64x8(), ROUNDING) +pub fn _mm512_cvt_roundepu64_ph(a: __m512i) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtuqq2ph_512(a.as_u64x8(), ROUNDING) + } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12502,13 +12730,15 @@ pub unsafe fn _mm512_cvt_roundepu64_ph(a: __m512i) -> __m12 #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundepu64_ph( +pub fn _mm512_mask_cvt_roundepu64_ph( src: __m128h, k: __mmask8, a: __m512i, ) -> __m128h { - static_assert_rounding!(ROUNDING); - simd_select_bitmask(k, _mm512_cvt_roundepu64_ph::(a), src) + unsafe { + static_assert_rounding!(ROUNDING); + simd_select_bitmask(k, _mm512_cvt_roundepu64_ph::(a), src) + } } /// Convert packed unsigned 64-bit integers in a to packed half-precision (16-bit) floating-point elements, @@ -12528,10 +12758,7 @@ pub unsafe fn _mm512_mask_cvt_roundepu64_ph( #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundepu64_ph( - k: __mmask8, - a: __m512i, -) -> __m128h { +pub fn _mm512_maskz_cvt_roundepu64_ph(k: __mmask8, a: __m512i) -> __m128h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundepu64_ph::(_mm_setzero_ph(), k, a) } @@ -12544,7 +12771,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu64_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtxps_ph(a: __m128) -> __m128h { +pub fn _mm_cvtxps_ph(a: __m128) -> __m128h { _mm_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a) } @@ -12557,8 +12784,8 @@ pub unsafe fn _mm_cvtxps_ph(a: __m128) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h { - vcvtps2phx_128(a, src, k) +pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h { + unsafe { vcvtps2phx_128(a, src, k) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12570,7 +12797,7 @@ pub unsafe fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { +pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { _mm_mask_cvtxps_ph(_mm_setzero_ph(), k, a) } @@ -12582,7 +12809,7 @@ pub unsafe fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtxps_ph(a: __m256) -> __m128h { +pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h { _mm256_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a) } @@ -12595,8 +12822,8 @@ pub unsafe fn _mm256_cvtxps_ph(a: __m256) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h { - vcvtps2phx_256(a, src, k) +pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h { + unsafe { vcvtps2phx_256(a, src, k) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12608,7 +12835,7 @@ pub unsafe fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { +pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { _mm256_mask_cvtxps_ph(_mm_setzero_ph(), k, a) } @@ -12620,7 +12847,7 @@ pub unsafe fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtxps_ph(a: __m512) -> __m256h { +pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h { _mm512_mask_cvtxps_ph(_mm256_setzero_ph(), 0xffff, a) } @@ -12633,8 +12860,8 @@ pub unsafe fn _mm512_cvtxps_ph(a: __m512) -> __m256h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h { - vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h { + unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12646,7 +12873,7 @@ pub unsafe fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtps2phx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { +pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { _mm512_mask_cvtxps_ph(_mm256_setzero_ph(), k, a) } @@ -12667,7 +12894,7 @@ pub unsafe fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h { #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtx_roundps_ph(a: __m512) -> __m256h { +pub fn _mm512_cvtx_roundps_ph(a: __m512) -> __m256h { static_assert_rounding!(ROUNDING); _mm512_mask_cvtx_roundps_ph::(_mm256_setzero_ph(), 0xffff, a) } @@ -12690,13 +12917,15 @@ pub unsafe fn _mm512_cvtx_roundps_ph(a: __m512) -> __m256h #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtx_roundps_ph( +pub fn _mm512_mask_cvtx_roundps_ph( src: __m256h, k: __mmask16, a: __m512, ) -> __m256h { - static_assert_rounding!(ROUNDING); - vcvtps2phx_512(a, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vcvtps2phx_512(a, src, k, ROUNDING) + } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12717,10 +12946,7 @@ pub unsafe fn _mm512_mask_cvtx_roundps_ph( #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtx_roundps_ph( - k: __mmask16, - a: __m512, -) -> __m256h { +pub fn _mm512_maskz_cvtx_roundps_ph(k: __mmask16, a: __m512) -> __m256h { static_assert_rounding!(ROUNDING); _mm512_mask_cvtx_roundps_ph::(_mm256_setzero_ph(), k, a) } @@ -12734,7 +12960,7 @@ pub unsafe fn _mm512_maskz_cvtx_roundps_ph( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtss2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { +pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { _mm_mask_cvtss_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -12748,8 +12974,8 @@ pub unsafe fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtss2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h { - vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h { + unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) @@ -12762,7 +12988,7 @@ pub unsafe fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtss2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h { +pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h { _mm_mask_cvtss_sh(_mm_setzero_ph(), k, a, b) } @@ -12784,7 +13010,7 @@ pub unsafe fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundss_sh(a: __m128h, b: __m128) -> __m128h { +pub fn _mm_cvt_roundss_sh(a: __m128h, b: __m128) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_cvt_roundss_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -12808,14 +13034,16 @@ pub unsafe fn _mm_cvt_roundss_sh(a: __m128h, b: __m128) -> #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvt_roundss_sh( +pub fn _mm_mask_cvt_roundss_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtss2sh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vcvtss2sh(a, b, src, k, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in b to a half-precision (16-bit) @@ -12837,7 +13065,7 @@ pub unsafe fn _mm_mask_cvt_roundss_sh( #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvt_roundss_sh( +pub fn _mm_maskz_cvt_roundss_sh( k: __mmask8, a: __m128h, b: __m128, @@ -12854,7 +13082,7 @@ pub unsafe fn _mm_maskz_cvt_roundss_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtpd_ph(a: __m128d) -> __m128h { +pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h { _mm_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a) } @@ -12867,8 +13095,8 @@ pub unsafe fn _mm_cvtpd_ph(a: __m128d) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h { - vcvtpd2ph_128(a, src, k) +pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h { + unsafe { vcvtpd2ph_128(a, src, k) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12880,7 +13108,7 @@ pub unsafe fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { +pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { _mm_mask_cvtpd_ph(_mm_setzero_ph(), k, a) } @@ -12892,7 +13120,7 @@ pub unsafe fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { +pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { _mm256_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a) } @@ -12905,8 +13133,8 @@ pub unsafe fn _mm256_cvtpd_ph(a: __m256d) -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h { - vcvtpd2ph_256(a, src, k) +pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h { + unsafe { vcvtpd2ph_256(a, src, k) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12918,7 +13146,7 @@ pub unsafe fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { +pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { _mm256_mask_cvtpd_ph(_mm_setzero_ph(), k, a) } @@ -12930,7 +13158,7 @@ pub unsafe fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { +pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { _mm512_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a) } @@ -12943,8 +13171,8 @@ pub unsafe fn _mm512_cvtpd_ph(a: __m512d) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h { - vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h { + unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) @@ -12956,7 +13184,7 @@ pub unsafe fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtpd2ph))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { +pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { _mm512_mask_cvtpd_ph(_mm_setzero_ph(), k, a) } @@ -12977,7 +13205,7 @@ pub unsafe fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h { #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundpd_ph(a: __m512d) -> __m128h { +pub fn _mm512_cvt_roundpd_ph(a: __m512d) -> __m128h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_ph::(_mm_setzero_ph(), 0xff, a) } @@ -13000,13 +13228,15 @@ pub unsafe fn _mm512_cvt_roundpd_ph(a: __m512d) -> __m128h #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundpd_ph( +pub fn _mm512_mask_cvt_roundpd_ph( src: __m128h, k: __mmask8, a: __m512d, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtpd2ph_512(a, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vcvtpd2ph_512(a, src, k, ROUNDING) + } } /// Convert packed double-precision (64-bit) floating-point elements in a to packed half-precision (16-bit) @@ -13027,7 +13257,7 @@ pub unsafe fn _mm512_mask_cvt_roundpd_ph( #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundpd_ph(k: __mmask8, a: __m512d) -> __m128h { +pub fn _mm512_maskz_cvt_roundpd_ph(k: __mmask8, a: __m512d) -> __m128h { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundpd_ph::(_mm_setzero_ph(), k, a) } @@ -13041,7 +13271,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_ph(k: __mmask8, a: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsd2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { +pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { _mm_mask_cvtsd_sh(_mm_undefined_ph(), 0xff, a, b) } @@ -13055,8 +13285,8 @@ pub unsafe fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsd2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h { - vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h { + unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) @@ -13069,7 +13299,7 @@ pub unsafe fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsd2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h { +pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h { _mm_mask_cvtsd_sh(_mm_setzero_ph(), k, a, b) } @@ -13091,7 +13321,7 @@ pub unsafe fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsd_sh(a: __m128h, b: __m128d) -> __m128h { +pub fn _mm_cvt_roundsd_sh(a: __m128h, b: __m128d) -> __m128h { static_assert_rounding!(ROUNDING); _mm_mask_cvt_roundsd_sh::(_mm_undefined_ph(), 0xff, a, b) } @@ -13115,14 +13345,16 @@ pub unsafe fn _mm_cvt_roundsd_sh(a: __m128h, b: __m128d) -> #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvt_roundsd_sh( +pub fn _mm_mask_cvt_roundsd_sh( src: __m128h, k: __mmask8, a: __m128h, b: __m128d, ) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtsd2sh(a, b, src, k, ROUNDING) + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsd2sh(a, b, src, k, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in b to a half-precision (16-bit) @@ -13144,7 +13376,7 @@ pub unsafe fn _mm_mask_cvt_roundsd_sh( #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvt_roundsd_sh( +pub fn _mm_maskz_cvt_roundsd_sh( k: __mmask8, a: __m128h, b: __m128d, @@ -13161,7 +13393,7 @@ pub unsafe fn _mm_maskz_cvt_roundsd_sh( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epi16(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i { _mm_mask_cvtph_epi16(_mm_undefined_si128(), 0xff, a) } @@ -13174,8 +13406,8 @@ pub unsafe fn _mm_cvtph_epi16(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2w_128(a, src.as_i16x8(), k)) +pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2w_128(a, src.as_i16x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and @@ -13186,7 +13418,7 @@ pub unsafe fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epi16(_mm_setzero_si128(), k, a) } @@ -13198,7 +13430,7 @@ pub unsafe fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { +pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { _mm256_mask_cvtph_epi16(_mm256_undefined_si256(), 0xffff, a) } @@ -13211,8 +13443,8 @@ pub unsafe fn _mm256_cvtph_epi16(a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { - transmute(vcvtph2w_256(a, src.as_i16x16(), k)) +pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvtph2w_256(a, src.as_i16x16(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and @@ -13223,7 +13455,7 @@ pub unsafe fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { +pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { _mm256_mask_cvtph_epi16(_mm256_setzero_si256(), k, a) } @@ -13235,7 +13467,7 @@ pub unsafe fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { +pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { _mm512_mask_cvtph_epi16(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13248,13 +13480,15 @@ pub unsafe fn _mm512_cvtph_epi16(a: __m512h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { - transmute(vcvtph2w_512( - a, - src.as_i16x32(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvtph2w_512( + a, + src.as_i16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and @@ -13265,7 +13499,7 @@ pub unsafe fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { _mm512_mask_cvtph_epi16(_mm512_setzero_si512(), k, a) } @@ -13286,7 +13520,7 @@ pub unsafe fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m512i { +pub fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13309,13 +13543,15 @@ pub unsafe fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epi16( +pub fn _mm512_mask_cvt_roundph_epi16( src: __m512i, k: __mmask32, a: __m512h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and @@ -13335,10 +13571,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epi16( #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epi16( - k: __mmask32, - a: __m512h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi16::(_mm512_setzero_si512(), k, a) } @@ -13351,7 +13584,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epi16( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epu16(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i { _mm_mask_cvtph_epu16(_mm_undefined_si128(), 0xff, a) } @@ -13364,8 +13597,8 @@ pub unsafe fn _mm_cvtph_epu16(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2uw_128(a, src.as_u16x8(), k)) +pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2uw_128(a, src.as_u16x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, @@ -13376,7 +13609,7 @@ pub unsafe fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epu16(_mm_setzero_si128(), k, a) } @@ -13388,7 +13621,7 @@ pub unsafe fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { +pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { _mm256_mask_cvtph_epu16(_mm256_undefined_si256(), 0xffff, a) } @@ -13401,8 +13634,8 @@ pub unsafe fn _mm256_cvtph_epu16(a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { - transmute(vcvtph2uw_256(a, src.as_u16x16(), k)) +pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvtph2uw_256(a, src.as_u16x16(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, @@ -13413,7 +13646,7 @@ pub unsafe fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { +pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { _mm256_mask_cvtph_epu16(_mm256_setzero_si256(), k, a) } @@ -13425,7 +13658,7 @@ pub unsafe fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { +pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { _mm512_mask_cvtph_epu16(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13438,13 +13671,15 @@ pub unsafe fn _mm512_cvtph_epu16(a: __m512h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { - transmute(vcvtph2uw_512( - a, - src.as_u16x32(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvtph2uw_512( + a, + src.as_u16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, @@ -13455,7 +13690,7 @@ pub unsafe fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { _mm512_mask_cvtph_epu16(_mm512_setzero_si512(), k, a) } @@ -13476,7 +13711,7 @@ pub unsafe fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i { +pub fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13499,13 +13734,15 @@ pub unsafe fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epu16( +pub fn _mm512_mask_cvt_roundph_epu16( src: __m512i, k: __mmask32, a: __m512h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, @@ -13525,10 +13762,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epu16( #[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epu16( - k: __mmask32, - a: __m512h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a) } @@ -13541,7 +13775,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epu16( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epi16(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i { _mm_mask_cvttph_epi16(_mm_undefined_si128(), 0xff, a) } @@ -13554,8 +13788,8 @@ pub unsafe fn _mm_cvttph_epi16(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2w_128(a, src.as_i16x8(), k)) +pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2w_128(a, src.as_i16x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with @@ -13567,7 +13801,7 @@ pub unsafe fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epi16(_mm_setzero_si128(), k, a) } @@ -13579,7 +13813,7 @@ pub unsafe fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { +pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { _mm256_mask_cvttph_epi16(_mm256_undefined_si256(), 0xffff, a) } @@ -13592,8 +13826,8 @@ pub unsafe fn _mm256_cvttph_epi16(a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { - transmute(vcvttph2w_256(a, src.as_i16x16(), k)) +pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvttph2w_256(a, src.as_i16x16(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with @@ -13605,7 +13839,7 @@ pub unsafe fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) - #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { +pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { _mm256_mask_cvttph_epi16(_mm256_setzero_si256(), k, a) } @@ -13617,7 +13851,7 @@ pub unsafe fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { +pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { _mm512_mask_cvttph_epi16(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13630,13 +13864,15 @@ pub unsafe fn _mm512_cvttph_epi16(a: __m512h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { - transmute(vcvttph2w_512( - a, - src.as_i16x32(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvttph2w_512( + a, + src.as_i16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with @@ -13648,7 +13884,7 @@ pub unsafe fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) - #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2w))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { _mm512_mask_cvttph_epi16(_mm512_setzero_si512(), k, a) } @@ -13663,7 +13899,7 @@ pub unsafe fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i { +pub fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13680,13 +13916,15 @@ pub unsafe fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epi16( +pub fn _mm512_mask_cvtt_roundph_epi16( src: __m512i, k: __mmask32, a: __m512h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2w_512(a, src.as_i16x32(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2w_512(a, src.as_i16x32(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with @@ -13701,7 +13939,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epi16( #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi16::(_mm512_setzero_si512(), k, a) } @@ -13714,7 +13952,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epu16(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i { _mm_mask_cvttph_epu16(_mm_undefined_si128(), 0xff, a) } @@ -13727,8 +13965,8 @@ pub unsafe fn _mm_cvttph_epu16(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2uw_128(a, src.as_u16x8(), k)) +pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2uw_128(a, src.as_u16x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with @@ -13740,7 +13978,7 @@ pub unsafe fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epu16(_mm_setzero_si128(), k, a) } @@ -13752,7 +13990,7 @@ pub unsafe fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { +pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { _mm256_mask_cvttph_epu16(_mm256_undefined_si256(), 0xffff, a) } @@ -13765,8 +14003,8 @@ pub unsafe fn _mm256_cvttph_epu16(a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { - transmute(vcvttph2uw_256(a, src.as_u16x16(), k)) +pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i { + unsafe { transmute(vcvttph2uw_256(a, src.as_u16x16(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with @@ -13778,7 +14016,7 @@ pub unsafe fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) - #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { +pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { _mm256_mask_cvttph_epu16(_mm256_setzero_si256(), k, a) } @@ -13790,7 +14028,7 @@ pub unsafe fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { +pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { _mm512_mask_cvttph_epu16(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13803,13 +14041,15 @@ pub unsafe fn _mm512_cvttph_epu16(a: __m512h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { - transmute(vcvttph2uw_512( - a, - src.as_u16x32(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i { + unsafe { + transmute(vcvttph2uw_512( + a, + src.as_u16x32(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with @@ -13821,7 +14061,7 @@ pub unsafe fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) - #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uw))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { _mm512_mask_cvttph_epu16(_mm512_setzero_si512(), k, a) } @@ -13836,7 +14076,7 @@ pub unsafe fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i { +pub fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) } @@ -13853,13 +14093,15 @@ pub unsafe fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epu16( +pub fn _mm512_mask_cvtt_roundph_epu16( src: __m512i, k: __mmask32, a: __m512h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2uw_512(a, src.as_u16x32(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2uw_512(a, src.as_u16x32(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with @@ -13874,7 +14116,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epu16( #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu16::(_mm512_setzero_si512(), k, a) } @@ -13887,7 +14129,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: _ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epi32(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i { _mm_mask_cvtph_epi32(_mm_undefined_si128(), 0xff, a) } @@ -13899,8 +14141,8 @@ pub unsafe fn _mm_cvtph_epi32(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2dq_128(a, src.as_i32x4(), k)) +pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2dq_128(a, src.as_i32x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the @@ -13911,7 +14153,7 @@ pub unsafe fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epi32(_mm_setzero_si128(), k, a) } @@ -13923,7 +14165,7 @@ pub unsafe fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { +pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { _mm256_mask_cvtph_epi32(_mm256_undefined_si256(), 0xff, a) } @@ -13935,8 +14177,8 @@ pub unsafe fn _mm256_cvtph_epi32(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvtph2dq_256(a, src.as_i32x8(), k)) +pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2dq_256(a, src.as_i32x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the @@ -13947,7 +14189,7 @@ pub unsafe fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvtph_epi32(_mm256_setzero_si256(), k, a) } @@ -13959,7 +14201,7 @@ pub unsafe fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { +pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { _mm512_mask_cvtph_epi32(_mm512_undefined_epi32(), 0xffff, a) } @@ -13971,13 +14213,15 @@ pub unsafe fn _mm512_cvtph_epi32(a: __m256h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { - transmute(vcvtph2dq_512( - a, - src.as_i32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvtph2dq_512( + a, + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the @@ -13988,7 +14232,7 @@ pub unsafe fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { _mm512_mask_cvtph_epi32(_mm512_setzero_si512(), k, a) } @@ -14009,7 +14253,7 @@ pub unsafe fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epi32(a: __m256h) -> __m512i { +pub fn _mm512_cvt_roundph_epi32(a: __m256h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a) } @@ -14031,13 +14275,15 @@ pub unsafe fn _mm512_cvt_roundph_epi32(a: __m256h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epi32( +pub fn _mm512_mask_cvt_roundph_epi32( src: __m512i, k: __mmask16, a: __m256h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the @@ -14057,10 +14303,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epi32( #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epi32( - k: __mmask16, - a: __m256h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi32::(_mm512_setzero_si512(), k, a) } @@ -14073,8 +14316,8 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epi32( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2si))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_i32(a: __m128h) -> i32 { - vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsh_i32(a: __m128h) -> i32 { + unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store @@ -14094,9 +14337,11 @@ pub unsafe fn _mm_cvtsh_i32(a: __m128h) -> i32 { #[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_i32(a: __m128h) -> i32 { - static_assert_rounding!(ROUNDING); - vcvtsh2si32(a, ROUNDING) +pub fn _mm_cvt_roundsh_i32(a: __m128h) -> i32 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2si32(a, ROUNDING) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the @@ -14107,7 +14352,7 @@ pub unsafe fn _mm_cvt_roundsh_i32(a: __m128h) -> i32 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epu32(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i { _mm_mask_cvtph_epu32(_mm_undefined_si128(), 0xff, a) } @@ -14119,8 +14364,8 @@ pub unsafe fn _mm_cvtph_epu32(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2udq_128(a, src.as_u32x4(), k)) +pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2udq_128(a, src.as_u32x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store @@ -14131,7 +14376,7 @@ pub unsafe fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epu32(_mm_setzero_si128(), k, a) } @@ -14143,7 +14388,7 @@ pub unsafe fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { +pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { _mm256_mask_cvtph_epu32(_mm256_undefined_si256(), 0xff, a) } @@ -14155,8 +14400,8 @@ pub unsafe fn _mm256_cvtph_epu32(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvtph2udq_256(a, src.as_u32x8(), k)) +pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2udq_256(a, src.as_u32x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store @@ -14167,7 +14412,7 @@ pub unsafe fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvtph_epu32(_mm256_setzero_si256(), k, a) } @@ -14179,7 +14424,7 @@ pub unsafe fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { +pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { _mm512_mask_cvtph_epu32(_mm512_undefined_epi32(), 0xffff, a) } @@ -14191,13 +14436,15 @@ pub unsafe fn _mm512_cvtph_epu32(a: __m256h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { - transmute(vcvtph2udq_512( - a, - src.as_u32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvtph2udq_512( + a, + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store @@ -14208,7 +14455,7 @@ pub unsafe fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { _mm512_mask_cvtph_epu32(_mm512_setzero_si512(), k, a) } @@ -14229,7 +14476,7 @@ pub unsafe fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epu32(a: __m256h) -> __m512i { +pub fn _mm512_cvt_roundph_epu32(a: __m256h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a) } @@ -14251,13 +14498,15 @@ pub unsafe fn _mm512_cvt_roundph_epu32(a: __m256h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epu32( +pub fn _mm512_mask_cvt_roundph_epu32( src: __m512i, k: __mmask16, a: __m256h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store @@ -14277,10 +14526,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epu32( #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epu32( - k: __mmask16, - a: __m256h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu32::(_mm512_setzero_si512(), k, a) } @@ -14293,8 +14539,8 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epu32( #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2usi))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_u32(a: __m128h) -> u32 { - vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsh_u32(a: __m128h) -> u32 { + unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store @@ -14314,9 +14560,11 @@ pub unsafe fn _mm_cvtsh_u32(a: __m128h) -> u32 { #[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { - static_assert_rounding!(ROUNDING); - vcvtsh2usi32(a, ROUNDING) +pub fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2usi32(a, ROUNDING) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and @@ -14327,7 +14575,7 @@ pub unsafe fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epi32(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i { _mm_mask_cvttph_epi32(_mm_undefined_si128(), 0xff, a) } @@ -14339,8 +14587,8 @@ pub unsafe fn _mm_cvttph_epi32(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2dq_128(a, src.as_i32x4(), k)) +pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2dq_128(a, src.as_i32x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and @@ -14351,7 +14599,7 @@ pub unsafe fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epi32(_mm_setzero_si128(), k, a) } @@ -14363,7 +14611,7 @@ pub unsafe fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { +pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { _mm256_mask_cvttph_epi32(_mm256_undefined_si256(), 0xff, a) } @@ -14375,8 +14623,8 @@ pub unsafe fn _mm256_cvttph_epi32(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvttph2dq_256(a, src.as_i32x8(), k)) +pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2dq_256(a, src.as_i32x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and @@ -14387,7 +14635,7 @@ pub unsafe fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvttph_epi32(_mm256_setzero_si256(), k, a) } @@ -14399,7 +14647,7 @@ pub unsafe fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { +pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { _mm512_mask_cvttph_epi32(_mm512_undefined_epi32(), 0xffff, a) } @@ -14411,13 +14659,15 @@ pub unsafe fn _mm512_cvttph_epi32(a: __m256h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { - transmute(vcvttph2dq_512( - a, - src.as_i32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvttph2dq_512( + a, + src.as_i32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and @@ -14428,7 +14678,7 @@ pub unsafe fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) - #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2dq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { _mm512_mask_cvttph_epi32(_mm512_setzero_si512(), k, a) } @@ -14443,7 +14693,7 @@ pub unsafe fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epi32(a: __m256h) -> __m512i { +pub fn _mm512_cvtt_roundph_epi32(a: __m256h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a) } @@ -14459,13 +14709,15 @@ pub unsafe fn _mm512_cvtt_roundph_epi32(a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epi32( +pub fn _mm512_mask_cvtt_roundph_epi32( src: __m512i, k: __mmask16, a: __m256h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2dq_512(a, src.as_i32x16(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2dq_512(a, src.as_i32x16(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and @@ -14479,7 +14731,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epi32( #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi32::(_mm512_setzero_si512(), k, a) } @@ -14492,8 +14744,8 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epi32(k: __mmask16, a: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttsh2si))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttsh_i32(a: __m128h) -> i32 { - vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsh_i32(a: __m128h) -> i32 { + unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store @@ -14507,9 +14759,11 @@ pub unsafe fn _mm_cvttsh_i32(a: __m128h) -> i32 { #[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtt_roundsh_i32(a: __m128h) -> i32 { - static_assert_sae!(SAE); - vcvttsh2si32(a, SAE) +pub fn _mm_cvtt_roundsh_i32(a: __m128h) -> i32 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2si32(a, SAE) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and @@ -14520,7 +14774,7 @@ pub unsafe fn _mm_cvtt_roundsh_i32(a: __m128h) -> i32 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epu32(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i { _mm_mask_cvttph_epu32(_mm_undefined_si128(), 0xff, a) } @@ -14532,8 +14786,8 @@ pub unsafe fn _mm_cvttph_epu32(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2udq_128(a, src.as_u32x4(), k)) +pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2udq_128(a, src.as_u32x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and @@ -14544,7 +14798,7 @@ pub unsafe fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epu32(_mm_setzero_si128(), k, a) } @@ -14556,7 +14810,7 @@ pub unsafe fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { +pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { _mm256_mask_cvttph_epu32(_mm256_undefined_si256(), 0xff, a) } @@ -14568,8 +14822,8 @@ pub unsafe fn _mm256_cvttph_epu32(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvttph2udq_256(a, src.as_u32x8(), k)) +pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2udq_256(a, src.as_u32x8(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and @@ -14580,7 +14834,7 @@ pub unsafe fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvttph_epu32(_mm256_setzero_si256(), k, a) } @@ -14592,7 +14846,7 @@ pub unsafe fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { +pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { _mm512_mask_cvttph_epu32(_mm512_undefined_epi32(), 0xffff, a) } @@ -14604,13 +14858,15 @@ pub unsafe fn _mm512_cvttph_epu32(a: __m256h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { - transmute(vcvttph2udq_512( - a, - src.as_u32x16(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i { + unsafe { + transmute(vcvttph2udq_512( + a, + src.as_u32x16(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and @@ -14621,7 +14877,7 @@ pub unsafe fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) - #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2udq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { _mm512_mask_cvttph_epu32(_mm512_setzero_si512(), k, a) } @@ -14636,7 +14892,7 @@ pub unsafe fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epu32(a: __m256h) -> __m512i { +pub fn _mm512_cvtt_roundph_epu32(a: __m256h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a) } @@ -14652,13 +14908,15 @@ pub unsafe fn _mm512_cvtt_roundph_epu32(a: __m256h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epu32( +pub fn _mm512_mask_cvtt_roundph_epu32( src: __m512i, k: __mmask16, a: __m256h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2udq_512(a, src.as_u32x16(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2udq_512(a, src.as_u32x16(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and @@ -14672,7 +14930,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epu32( #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu32::(_mm512_setzero_si512(), k, a) } @@ -14685,8 +14943,8 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epu32(k: __mmask16, a: _ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttsh2usi))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttsh_u32(a: __m128h) -> u32 { - vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsh_u32(a: __m128h) -> u32 { + unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store @@ -14700,9 +14958,11 @@ pub unsafe fn _mm_cvttsh_u32(a: __m128h) -> u32 { #[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtt_roundsh_u32(a: __m128h) -> u32 { - static_assert_sae!(SAE); - vcvttsh2usi32(a, SAE) +pub fn _mm_cvtt_roundsh_u32(a: __m128h) -> u32 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2usi32(a, SAE) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and @@ -14713,7 +14973,7 @@ pub unsafe fn _mm_cvtt_roundsh_u32(a: __m128h) -> u32 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epi64(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i { _mm_mask_cvtph_epi64(_mm_undefined_si128(), 0xff, a) } @@ -14725,8 +14985,8 @@ pub unsafe fn _mm_cvtph_epi64(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2qq_128(a, src.as_i64x2(), k)) +pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2qq_128(a, src.as_i64x2(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and @@ -14737,7 +14997,7 @@ pub unsafe fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epi64(_mm_setzero_si128(), k, a) } @@ -14749,7 +15009,7 @@ pub unsafe fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { +pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { _mm256_mask_cvtph_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -14761,8 +15021,8 @@ pub unsafe fn _mm256_cvtph_epi64(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvtph2qq_256(a, src.as_i64x4(), k)) +pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2qq_256(a, src.as_i64x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and @@ -14773,7 +15033,7 @@ pub unsafe fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvtph_epi64(_mm256_setzero_si256(), k, a) } @@ -14785,7 +15045,7 @@ pub unsafe fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { +pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { _mm512_mask_cvtph_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -14797,13 +15057,15 @@ pub unsafe fn _mm512_cvtph_epi64(a: __m128h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { - transmute(vcvtph2qq_512( - a, - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvtph2qq_512( + a, + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and @@ -14814,7 +15076,7 @@ pub unsafe fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { _mm512_mask_cvtph_epi64(_mm512_setzero_si512(), k, a) } @@ -14835,7 +15097,7 @@ pub unsafe fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epi64(a: __m128h) -> __m512i { +pub fn _mm512_cvt_roundph_epi64(a: __m128h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -14857,13 +15119,15 @@ pub unsafe fn _mm512_cvt_roundph_epi64(a: __m128h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epi64( +pub fn _mm512_mask_cvt_roundph_epi64( src: __m512i, k: __mmask8, a: __m128h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and @@ -14883,10 +15147,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epi64( #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epi64( - k: __mmask8, - a: __m128h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epi64::(_mm512_setzero_si512(), k, a) } @@ -14899,7 +15160,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epi64( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_epu64(a: __m128h) -> __m128i { +pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i { _mm_mask_cvtph_epu64(_mm_undefined_si128(), 0xff, a) } @@ -14911,8 +15172,8 @@ pub unsafe fn _mm_cvtph_epu64(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvtph2uqq_128(a, src.as_u64x2(), k)) +pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvtph2uqq_128(a, src.as_u64x2(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and @@ -14923,7 +15184,7 @@ pub unsafe fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvtph_epu64(_mm_setzero_si128(), k, a) } @@ -14935,7 +15196,7 @@ pub unsafe fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { +pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { _mm256_mask_cvtph_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -14947,8 +15208,8 @@ pub unsafe fn _mm256_cvtph_epu64(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvtph2uqq_256(a, src.as_u64x4(), k)) +pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvtph2uqq_256(a, src.as_u64x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and @@ -14959,7 +15220,7 @@ pub unsafe fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvtph_epu64(_mm256_setzero_si256(), k, a) } @@ -14971,7 +15232,7 @@ pub unsafe fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { +pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { _mm512_mask_cvtph_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -14983,13 +15244,15 @@ pub unsafe fn _mm512_cvtph_epu64(a: __m128h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { - transmute(vcvtph2uqq_512( - a, - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvtph2uqq_512( + a, + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and @@ -15000,7 +15263,7 @@ pub unsafe fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { _mm512_mask_cvtph_epu64(_mm512_setzero_si512(), k, a) } @@ -15021,7 +15284,7 @@ pub unsafe fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_epu64(a: __m128h) -> __m512i { +pub fn _mm512_cvt_roundph_epu64(a: __m128h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -15043,13 +15306,15 @@ pub unsafe fn _mm512_cvt_roundph_epu64(a: __m128h) -> __m51 #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_epu64( +pub fn _mm512_mask_cvt_roundph_epu64( src: __m512i, k: __mmask8, a: __m128h, ) -> __m512i { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING)) + unsafe { + static_assert_rounding!(ROUNDING); + transmute(vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and @@ -15069,10 +15334,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_epu64( #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_epu64( - k: __mmask8, - a: __m128h, -) -> __m512i { +pub fn _mm512_maskz_cvt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i { static_assert_rounding!(ROUNDING); _mm512_mask_cvt_roundph_epu64::(_mm512_setzero_si512(), k, a) } @@ -15085,7 +15347,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_epu64( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epi64(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i { _mm_mask_cvttph_epi64(_mm_undefined_si128(), 0xff, a) } @@ -15097,8 +15359,8 @@ pub unsafe fn _mm_cvttph_epi64(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2qq_128(a, src.as_i64x2(), k)) +pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2qq_128(a, src.as_i64x2(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and @@ -15109,7 +15371,7 @@ pub unsafe fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epi64(_mm_setzero_si128(), k, a) } @@ -15121,7 +15383,7 @@ pub unsafe fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { +pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { _mm256_mask_cvttph_epi64(_mm256_undefined_si256(), 0xff, a) } @@ -15133,8 +15395,8 @@ pub unsafe fn _mm256_cvttph_epi64(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvttph2qq_256(a, src.as_i64x4(), k)) +pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2qq_256(a, src.as_i64x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and @@ -15145,7 +15407,7 @@ pub unsafe fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvttph_epi64(_mm256_setzero_si256(), k, a) } @@ -15157,7 +15419,7 @@ pub unsafe fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { +pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { _mm512_mask_cvttph_epi64(_mm512_undefined_epi32(), 0xff, a) } @@ -15169,13 +15431,15 @@ pub unsafe fn _mm512_cvttph_epi64(a: __m128h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { - transmute(vcvttph2qq_512( - a, - src.as_i64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvttph2qq_512( + a, + src.as_i64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and @@ -15186,7 +15450,7 @@ pub unsafe fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2qq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { _mm512_mask_cvttph_epi64(_mm512_setzero_si512(), k, a) } @@ -15201,7 +15465,7 @@ pub unsafe fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epi64(a: __m128h) -> __m512i { +pub fn _mm512_cvtt_roundph_epi64(a: __m128h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a) } @@ -15217,13 +15481,15 @@ pub unsafe fn _mm512_cvtt_roundph_epi64(a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epi64( +pub fn _mm512_mask_cvtt_roundph_epi64( src: __m512i, k: __mmask8, a: __m128h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2qq_512(a, src.as_i64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2qq_512(a, src.as_i64x8(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and @@ -15237,7 +15503,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epi64( #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epi64::(_mm512_setzero_si512(), k, a) } @@ -15250,7 +15516,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epi64(k: __mmask8, a: __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttph_epu64(a: __m128h) -> __m128i { +pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i { _mm_mask_cvttph_epu64(_mm_undefined_si128(), 0xff, a) } @@ -15262,8 +15528,8 @@ pub unsafe fn _mm_cvttph_epu64(a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { - transmute(vcvttph2uqq_128(a, src.as_u64x2(), k)) +pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i { + unsafe { transmute(vcvttph2uqq_128(a, src.as_u64x2(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and @@ -15274,7 +15540,7 @@ pub unsafe fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { +pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { _mm_mask_cvttph_epu64(_mm_setzero_si128(), k, a) } @@ -15286,7 +15552,7 @@ pub unsafe fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { +pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { _mm256_mask_cvttph_epu64(_mm256_undefined_si256(), 0xff, a) } @@ -15298,8 +15564,8 @@ pub unsafe fn _mm256_cvttph_epu64(a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { - transmute(vcvttph2uqq_256(a, src.as_u64x4(), k)) +pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i { + unsafe { transmute(vcvttph2uqq_256(a, src.as_u64x4(), k)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and @@ -15310,7 +15576,7 @@ pub unsafe fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { +pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { _mm256_mask_cvttph_epu64(_mm256_setzero_si256(), k, a) } @@ -15322,7 +15588,7 @@ pub unsafe fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { +pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { _mm512_mask_cvttph_epu64(_mm512_undefined_epi32(), 0xff, a) } @@ -15334,13 +15600,15 @@ pub unsafe fn _mm512_cvttph_epu64(a: __m128h) -> __m512i { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { - transmute(vcvttph2uqq_512( - a, - src.as_u64x8(), - k, - _MM_FROUND_CUR_DIRECTION, - )) +pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i { + unsafe { + transmute(vcvttph2uqq_512( + a, + src.as_u64x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and @@ -15351,7 +15619,7 @@ pub unsafe fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttph2uqq))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { _mm512_mask_cvttph_epu64(_mm512_setzero_si512(), k, a) } @@ -15366,7 +15634,7 @@ pub unsafe fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtt_roundph_epu64(a: __m128h) -> __m512i { +pub fn _mm512_cvtt_roundph_epu64(a: __m128h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a) } @@ -15382,13 +15650,15 @@ pub unsafe fn _mm512_cvtt_roundph_epu64(a: __m128h) -> __m512i { #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtt_roundph_epu64( +pub fn _mm512_mask_cvtt_roundph_epu64( src: __m512i, k: __mmask8, a: __m128h, ) -> __m512i { - static_assert_sae!(SAE); - transmute(vcvttph2uqq_512(a, src.as_u64x8(), k, SAE)) + unsafe { + static_assert_sae!(SAE); + transmute(vcvttph2uqq_512(a, src.as_u64x8(), k, SAE)) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and @@ -15402,7 +15672,7 @@ pub unsafe fn _mm512_mask_cvtt_roundph_epu64( #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i { +pub fn _mm512_maskz_cvtt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i { static_assert_sae!(SAE); _mm512_mask_cvtt_roundph_epu64::(_mm512_setzero_si512(), k, a) } @@ -15415,7 +15685,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundph_epu64(k: __mmask8, a: __ #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtxph_ps(a: __m128h) -> __m128 { +pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 { _mm_mask_cvtxph_ps(_mm_setzero_ps(), 0xff, a) } @@ -15428,8 +15698,8 @@ pub unsafe fn _mm_cvtxph_ps(a: __m128h) -> __m128 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 { - vcvtph2psx_128(a, src, k) +pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 { + unsafe { vcvtph2psx_128(a, src, k) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -15441,7 +15711,7 @@ pub unsafe fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { +pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { _mm_mask_cvtxph_ps(_mm_setzero_ps(), k, a) } @@ -15453,7 +15723,7 @@ pub unsafe fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { +pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), 0xff, a) } @@ -15466,8 +15736,8 @@ pub unsafe fn _mm256_cvtxph_ps(a: __m128h) -> __m256 { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 { - vcvtph2psx_256(a, src, k) +pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 { + unsafe { vcvtph2psx_256(a, src, k) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -15479,7 +15749,7 @@ pub unsafe fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { +pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), k, a) } @@ -15491,7 +15761,7 @@ pub unsafe fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { +pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), 0xffff, a) } @@ -15504,8 +15774,8 @@ pub unsafe fn _mm512_cvtxph_ps(a: __m256h) -> __m512 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 { - vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 { + unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -15517,7 +15787,7 @@ pub unsafe fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2psx))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { +pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), k, a) } @@ -15532,7 +15802,7 @@ pub unsafe fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 { #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtx_roundph_ps(a: __m256h) -> __m512 { +pub fn _mm512_cvtx_roundph_ps(a: __m256h) -> __m512 { static_assert_sae!(SAE); _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), 0xffff, a) } @@ -15549,13 +15819,15 @@ pub unsafe fn _mm512_cvtx_roundph_ps(a: __m256h) -> __m512 { #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtx_roundph_ps( +pub fn _mm512_mask_cvtx_roundph_ps( src: __m512, k: __mmask16, a: __m256h, ) -> __m512 { - static_assert_sae!(SAE); - vcvtph2psx_512(a, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vcvtph2psx_512(a, src, k, SAE) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -15570,7 +15842,7 @@ pub unsafe fn _mm512_mask_cvtx_roundph_ps( #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtx_roundph_ps(k: __mmask16, a: __m256h) -> __m512 { +pub fn _mm512_maskz_cvtx_roundph_ps(k: __mmask16, a: __m256h) -> __m512 { static_assert_sae!(SAE); _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), k, a) } @@ -15584,7 +15856,7 @@ pub unsafe fn _mm512_maskz_cvtx_roundph_ps(k: __mmask16, a: __m2 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2ss))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { +pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { _mm_mask_cvtsh_ss(a, 0xff, a, b) } @@ -15598,8 +15870,8 @@ pub unsafe fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2ss))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 { - vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 { + unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) @@ -15612,7 +15884,7 @@ pub unsafe fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2ss))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { +pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { _mm_mask_cvtsh_ss(_mm_setzero_ps(), k, a, b) } @@ -15628,7 +15900,7 @@ pub unsafe fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_ss(a: __m128, b: __m128h) -> __m128 { +pub fn _mm_cvt_roundsh_ss(a: __m128, b: __m128h) -> __m128 { static_assert_sae!(SAE); _mm_mask_cvt_roundsh_ss::(_mm_undefined_ps(), 0xff, a, b) } @@ -15646,14 +15918,16 @@ pub unsafe fn _mm_cvt_roundsh_ss(a: __m128, b: __m128h) -> __m12 #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvt_roundsh_ss( +pub fn _mm_mask_cvt_roundsh_ss( src: __m128, k: __mmask8, a: __m128, b: __m128h, ) -> __m128 { - static_assert_sae!(SAE); - vcvtsh2ss(a, b, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vcvtsh2ss(a, b, src, k, SAE) + } } /// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit) @@ -15669,11 +15943,7 @@ pub unsafe fn _mm_mask_cvt_roundsh_ss( #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvt_roundsh_ss( - k: __mmask8, - a: __m128, - b: __m128h, -) -> __m128 { +pub fn _mm_maskz_cvt_roundsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 { static_assert_sae!(SAE); _mm_mask_cvt_roundsh_ss::(_mm_setzero_ps(), k, a, b) } @@ -15686,7 +15956,7 @@ pub unsafe fn _mm_maskz_cvt_roundsh_ss( #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtph_pd(a: __m128h) -> __m128d { +pub fn _mm_cvtph_pd(a: __m128h) -> __m128d { _mm_mask_cvtph_pd(_mm_setzero_pd(), 0xff, a) } @@ -15699,8 +15969,8 @@ pub unsafe fn _mm_cvtph_pd(a: __m128h) -> __m128d { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d { - vcvtph2pd_128(a, src, k) +pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d { + unsafe { vcvtph2pd_128(a, src, k) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) @@ -15712,7 +15982,7 @@ pub unsafe fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128 #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { +pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { _mm_mask_cvtph_pd(_mm_setzero_pd(), k, a) } @@ -15724,7 +15994,7 @@ pub unsafe fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtph_pd(a: __m128h) -> __m256d { +pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d { _mm256_mask_cvtph_pd(_mm256_setzero_pd(), 0xff, a) } @@ -15737,8 +16007,8 @@ pub unsafe fn _mm256_cvtph_pd(a: __m128h) -> __m256d { #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d { - vcvtph2pd_256(a, src, k) +pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d { + unsafe { vcvtph2pd_256(a, src, k) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) @@ -15750,7 +16020,7 @@ pub unsafe fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16,avx512vl")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { +pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { _mm256_mask_cvtph_pd(_mm256_setzero_pd(), k, a) } @@ -15762,7 +16032,7 @@ pub unsafe fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtph_pd(a: __m128h) -> __m512d { +pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d { _mm512_mask_cvtph_pd(_mm512_setzero_pd(), 0xff, a) } @@ -15775,8 +16045,8 @@ pub unsafe fn _mm512_cvtph_pd(a: __m128h) -> __m512d { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d { - vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d { + unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) @@ -15788,7 +16058,7 @@ pub unsafe fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtph2pd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { +pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { _mm512_mask_cvtph_pd(_mm512_setzero_pd(), k, a) } @@ -15803,7 +16073,7 @@ pub unsafe fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d { #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvt_roundph_pd(a: __m128h) -> __m512d { +pub fn _mm512_cvt_roundph_pd(a: __m128h) -> __m512d { static_assert_sae!(SAE); _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), 0xff, a) } @@ -15820,13 +16090,15 @@ pub unsafe fn _mm512_cvt_roundph_pd(a: __m128h) -> __m512d { #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_mask_cvt_roundph_pd( +pub fn _mm512_mask_cvt_roundph_pd( src: __m512d, k: __mmask8, a: __m128h, ) -> __m512d { - static_assert_sae!(SAE); - vcvtph2pd_512(a, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vcvtph2pd_512(a, src, k, SAE) + } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit) @@ -15841,7 +16113,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_pd( #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_maskz_cvt_roundph_pd(k: __mmask8, a: __m128h) -> __m512d { +pub fn _mm512_maskz_cvt_roundph_pd(k: __mmask8, a: __m128h) -> __m512d { static_assert_sae!(SAE); _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), k, a) } @@ -15855,7 +16127,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_pd(k: __mmask8, a: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2sd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { +pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { _mm_mask_cvtsh_sd(a, 0xff, a, b) } @@ -15869,8 +16141,8 @@ pub unsafe fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2sd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d { - vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d { + unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) @@ -15882,7 +16154,7 @@ pub unsafe fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128 #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2sd))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { +pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { _mm_mask_cvtsh_sd(_mm_setzero_pd(), k, a, b) } @@ -15898,7 +16170,7 @@ pub unsafe fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_sd(a: __m128d, b: __m128h) -> __m128d { +pub fn _mm_cvt_roundsh_sd(a: __m128d, b: __m128h) -> __m128d { static_assert_sae!(SAE); _mm_mask_cvt_roundsh_sd::(a, 0xff, a, b) } @@ -15916,14 +16188,16 @@ pub unsafe fn _mm_cvt_roundsh_sd(a: __m128d, b: __m128h) -> __m1 #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_mask_cvt_roundsh_sd( +pub fn _mm_mask_cvt_roundsh_sd( src: __m128d, k: __mmask8, a: __m128d, b: __m128h, ) -> __m128d { - static_assert_sae!(SAE); - vcvtsh2sd(a, b, src, k, SAE) + unsafe { + static_assert_sae!(SAE); + vcvtsh2sd(a, b, src, k, SAE) + } } /// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit) @@ -15938,11 +16212,7 @@ pub unsafe fn _mm_mask_cvt_roundsh_sd( #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_maskz_cvt_roundsh_sd( - k: __mmask8, - a: __m128d, - b: __m128h, -) -> __m128d { +pub fn _mm_maskz_cvt_roundsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d { static_assert_sae!(SAE); _mm_mask_cvt_roundsh_sd::(_mm_setzero_pd(), k, a, b) } @@ -15953,8 +16223,8 @@ pub unsafe fn _mm_maskz_cvt_roundsh_sd( #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_h(a: __m128h) -> f16 { - simd_extract!(a, 0) +pub fn _mm_cvtsh_h(a: __m128h) -> f16 { + unsafe { simd_extract!(a, 0) } } /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. @@ -15963,8 +16233,8 @@ pub unsafe fn _mm_cvtsh_h(a: __m128h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm256_cvtsh_h(a: __m256h) -> f16 { - simd_extract!(a, 0) +pub fn _mm256_cvtsh_h(a: __m256h) -> f16 { + unsafe { simd_extract!(a, 0) } } /// Copy the lower half-precision (16-bit) floating-point element from `a` to `dst`. @@ -15973,8 +16243,8 @@ pub unsafe fn _mm256_cvtsh_h(a: __m256h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm512_cvtsh_h(a: __m512h) -> f16 { - simd_extract!(a, 0) +pub fn _mm512_cvtsh_h(a: __m512h) -> f16 { + unsafe { simd_extract!(a, 0) } } /// Copy the lower 16-bit integer in a to dst. @@ -15983,8 +16253,8 @@ pub unsafe fn _mm512_cvtsh_h(a: __m512h) -> f16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsi128_si16(a: __m128i) -> i16 { - simd_extract!(a.as_i16x8(), 0) +pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 { + unsafe { simd_extract!(a.as_i16x8(), 0) } } /// Copy 16-bit integer a to the lower elements of dst, and zero the upper elements of dst. @@ -15993,8 +16263,8 @@ pub unsafe fn _mm_cvtsi128_si16(a: __m128i) -> i16 { #[inline] #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsi16_si128(a: i16) -> __m128i { - transmute(simd_insert!(i16x8::ZERO, 0, a)) +pub fn _mm_cvtsi16_si128(a: i16) -> __m128i { + unsafe { transmute(simd_insert!(i16x8::ZERO, 0, a)) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512ifma.rs b/crates/core_arch/src/x86/avx512ifma.rs index 12123c2162..e4e715ae7b 100644 --- a/crates/core_arch/src/x86/avx512ifma.rs +++ b/crates/core_arch/src/x86/avx512ifma.rs @@ -15,8 +15,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - vpmadd52huq_512(a, b, c) +pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { vpmadd52huq_512(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -31,13 +31,8 @@ pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51 #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm512_mask_madd52hi_epu64( - a: __m512i, - k: __mmask8, - b: __m512i, - c: __m512i, -) -> __m512i { - simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) +pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -52,13 +47,8 @@ pub unsafe fn _mm512_mask_madd52hi_epu64( #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm512_maskz_madd52hi_epu64( - k: __mmask8, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) +pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -72,8 +62,8 @@ pub unsafe fn _mm512_maskz_madd52hi_epu64( #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - vpmadd52luq_512(a, b, c) +pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { vpmadd52luq_512(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -88,13 +78,8 @@ pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51 #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm512_mask_madd52lo_epu64( - a: __m512i, - k: __mmask8, - b: __m512i, - c: __m512i, -) -> __m512i { - simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) +pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -109,13 +94,8 @@ pub unsafe fn _mm512_mask_madd52lo_epu64( #[target_feature(enable = "avx512ifma")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm512_maskz_madd52lo_epu64( - k: __mmask8, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) +pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -132,8 +112,8 @@ pub unsafe fn _mm512_maskz_madd52lo_epu64( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpmadd52huq) )] -pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - vpmadd52huq_256(a, b, c) +pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52huq_256(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -147,8 +127,8 @@ pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _ #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - vpmadd52huq_256(a, b, c) +pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52huq_256(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -163,13 +143,8 @@ pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25 #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm256_mask_madd52hi_epu64( - a: __m256i, - k: __mmask8, - b: __m256i, - c: __m256i, -) -> __m256i { - simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) +pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -184,13 +159,8 @@ pub unsafe fn _mm256_mask_madd52hi_epu64( #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm256_maskz_madd52hi_epu64( - k: __mmask8, - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) +pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -207,8 +177,8 @@ pub unsafe fn _mm256_maskz_madd52hi_epu64( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpmadd52luq) )] -pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - vpmadd52luq_256(a, b, c) +pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52luq_256(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -222,8 +192,8 @@ pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _ #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - vpmadd52luq_256(a, b, c) +pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { vpmadd52luq_256(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -238,13 +208,8 @@ pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25 #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm256_mask_madd52lo_epu64( - a: __m256i, - k: __mmask8, - b: __m256i, - c: __m256i, -) -> __m256i { - simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) +pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -259,13 +224,8 @@ pub unsafe fn _mm256_mask_madd52lo_epu64( #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm256_maskz_madd52lo_epu64( - k: __mmask8, - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) +pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -282,8 +242,8 @@ pub unsafe fn _mm256_maskz_madd52lo_epu64( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpmadd52huq) )] -pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - vpmadd52huq_128(a, b, c) +pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52huq_128(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -297,8 +257,8 @@ pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1 #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - vpmadd52huq_128(a, b, c) +pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52huq_128(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -313,8 +273,8 @@ pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) +pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -329,8 +289,8 @@ pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __ #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52huq))] -pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) +pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -347,8 +307,8 @@ pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _ all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpmadd52luq) )] -pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - vpmadd52luq_128(a, b, c) +pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52luq_128(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -362,8 +322,8 @@ pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1 #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - vpmadd52luq_128(a, b, c) +pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { vpmadd52luq_128(a, b, c) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -378,8 +338,8 @@ pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) +pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) } } /// Multiply packed unsigned 52-bit integers in each 64-bit element of @@ -394,8 +354,8 @@ pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __ #[target_feature(enable = "avx512ifma,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmadd52luq))] -pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) +pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vbmi.rs b/crates/core_arch/src/x86/avx512vbmi.rs index b9bded92d6..cd3f4ca03d 100644 --- a/crates/core_arch/src/x86/avx512vbmi.rs +++ b/crates/core_arch/src/x86/avx512vbmi.rs @@ -11,8 +11,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { - transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) +pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -22,14 +22,16 @@ pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2b))] -pub unsafe fn _mm512_mask_permutex2var_epi8( +pub fn _mm512_mask_permutex2var_epi8( a: __m512i, k: __mmask64, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); - transmute(simd_select_bitmask(k, permute, a.as_i8x64())) + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, a.as_i8x64())) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -39,14 +41,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi8( #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm512_maskz_permutex2var_epi8( +pub fn _mm512_maskz_permutex2var_epi8( k: __mmask64, a: __m512i, idx: __m512i, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); - transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -56,14 +60,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi8( #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2b))] -pub unsafe fn _mm512_mask2_permutex2var_epi8( +pub fn _mm512_mask2_permutex2var_epi8( a: __m512i, idx: __m512i, k: __mmask64, b: __m512i, ) -> __m512i { - let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); - transmute(simd_select_bitmask(k, permute, idx.as_i8x64())) + unsafe { + let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x64())) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -73,8 +79,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { - transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) +pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -84,14 +90,16 @@ pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2b))] -pub unsafe fn _mm256_mask_permutex2var_epi8( +pub fn _mm256_mask_permutex2var_epi8( a: __m256i, k: __mmask32, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); - transmute(simd_select_bitmask(k, permute, a.as_i8x32())) + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, a.as_i8x32())) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -101,14 +109,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm256_maskz_permutex2var_epi8( +pub fn _mm256_maskz_permutex2var_epi8( k: __mmask32, a: __m256i, idx: __m256i, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); - transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -118,14 +128,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2b))] -pub unsafe fn _mm256_mask2_permutex2var_epi8( +pub fn _mm256_mask2_permutex2var_epi8( a: __m256i, idx: __m256i, k: __mmask32, b: __m256i, ) -> __m256i { - let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); - transmute(simd_select_bitmask(k, permute, idx.as_i8x32())) + unsafe { + let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x32())) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -135,8 +147,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { - transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) +pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -146,14 +158,11 @@ pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermt2b))] -pub unsafe fn _mm_mask_permutex2var_epi8( - a: __m128i, - k: __mmask16, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); - transmute(simd_select_bitmask(k, permute, a.as_i8x16())) +pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, a.as_i8x16())) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -163,14 +172,11 @@ pub unsafe fn _mm_mask_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b -pub unsafe fn _mm_maskz_permutex2var_epi8( - k: __mmask16, - a: __m128i, - idx: __m128i, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); - transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) +pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) + } } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -180,14 +186,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermi2b))] -pub unsafe fn _mm_mask2_permutex2var_epi8( - a: __m128i, - idx: __m128i, - k: __mmask16, - b: __m128i, -) -> __m128i { - let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); - transmute(simd_select_bitmask(k, permute, idx.as_i8x16())) +pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); + transmute(simd_select_bitmask(k, permute, idx.as_i8x16())) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -197,8 +200,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi8( #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { - transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) +pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { + unsafe { transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -208,14 +211,16 @@ pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm512_mask_permutexvar_epi8( +pub fn _mm512_mask_permutexvar_epi8( src: __m512i, k: __mmask64, idx: __m512i, a: __m512i, ) -> __m512i { - let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); - transmute(simd_select_bitmask(k, permute, src.as_i8x64())) + unsafe { + let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); + transmute(simd_select_bitmask(k, permute, src.as_i8x64())) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -225,9 +230,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi8( #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { - let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); - transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) +pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { + unsafe { + let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -237,8 +244,8 @@ pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m51 #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { - transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) +pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { + unsafe { transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -248,14 +255,16 @@ pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm256_mask_permutexvar_epi8( +pub fn _mm256_mask_permutexvar_epi8( src: __m256i, k: __mmask32, idx: __m256i, a: __m256i, ) -> __m256i { - let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); - transmute(simd_select_bitmask(k, permute, src.as_i8x32())) + unsafe { + let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); + transmute(simd_select_bitmask(k, permute, src.as_i8x32())) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -265,9 +274,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { - let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); - transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) +pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { + unsafe { + let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -277,8 +288,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m25 #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { - transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) +pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { + unsafe { transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -288,14 +299,11 @@ pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm_mask_permutexvar_epi8( - src: __m128i, - k: __mmask16, - idx: __m128i, - a: __m128i, -) -> __m128i { - let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); - transmute(simd_select_bitmask(k, permute, src.as_i8x16())) +pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); + transmute(simd_select_bitmask(k, permute, src.as_i8x16())) + } } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -305,9 +313,11 @@ pub unsafe fn _mm_mask_permutexvar_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpermb))] -pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { - let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); - transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) +pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { + unsafe { + let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -317,8 +327,8 @@ pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) +pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -328,14 +338,16 @@ pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm512_mask_multishift_epi64_epi8( +pub fn _mm512_mask_multishift_epi64_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i { - let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, multishift, src.as_i8x64())) + unsafe { + let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x64())) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -345,9 +357,11 @@ pub unsafe fn _mm512_mask_multishift_epi64_epi8( #[target_feature(enable = "avx512vbmi")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, multishift, i8x64::ZERO)) +pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); + transmute(simd_select_bitmask(k, multishift, i8x64::ZERO)) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -357,8 +371,8 @@ pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __ #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) +pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -368,14 +382,16 @@ pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm256_mask_multishift_epi64_epi8( +pub fn _mm256_mask_multishift_epi64_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i { - let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, multishift, src.as_i8x32())) + unsafe { + let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x32())) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -385,9 +401,11 @@ pub unsafe fn _mm256_mask_multishift_epi64_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, multishift, i8x32::ZERO)) +pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); + transmute(simd_select_bitmask(k, multishift, i8x32::ZERO)) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -397,8 +415,8 @@ pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __ #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -408,14 +426,16 @@ pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm_mask_multishift_epi64_epi8( +pub fn _mm_mask_multishift_epi64_epi8( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i { - let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, multishift, src.as_i8x16())) + unsafe { + let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, multishift, src.as_i8x16())) + } } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -425,9 +445,11 @@ pub unsafe fn _mm_mask_multishift_epi64_epi8( #[target_feature(enable = "avx512vbmi,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmultishiftqb))] -pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, multishift, i8x16::ZERO)) +pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); + transmute(simd_select_bitmask(k, multishift, i8x16::ZERO)) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index f5a9cce3e6..97c7986c17 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -235,8 +235,8 @@ pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { - transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) +pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) } } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -246,8 +246,8 @@ pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { - transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) +pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) } } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -257,8 +257,8 @@ pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { - transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) +pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) } } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -268,8 +268,8 @@ pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { - transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) +pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) } } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -279,8 +279,8 @@ pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) +pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) } } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -290,8 +290,8 @@ pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) +pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -301,8 +301,8 @@ pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { - transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) +pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -312,8 +312,8 @@ pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { - transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) +pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -323,8 +323,8 @@ pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { - transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) +pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -334,8 +334,8 @@ pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { - transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) +pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -345,8 +345,8 @@ pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) +pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) } } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero. @@ -356,8 +356,8 @@ pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { - transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) +pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -367,8 +367,8 @@ pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { - transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) +pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -378,8 +378,8 @@ pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) - #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { - transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) +pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -389,8 +389,8 @@ pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { - transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) +pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -400,8 +400,8 @@ pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) - #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { - transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) +pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -411,8 +411,8 @@ pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) +pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) } } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -422,8 +422,8 @@ pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] -pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) +pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -433,8 +433,8 @@ pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { - transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) +pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -444,8 +444,8 @@ pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { - transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) +pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { + unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -455,8 +455,8 @@ pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { - transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) +pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -466,8 +466,8 @@ pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { - transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) +pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { + unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -477,8 +477,8 @@ pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { - transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) +pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) } } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -488,8 +488,8 @@ pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] -pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { - transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) +pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { + unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -499,8 +499,8 @@ pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) +pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -510,9 +510,11 @@ pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); - transmute(simd_select_bitmask(k, shf, a.as_i64x8())) +pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, a.as_i64x8())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -522,9 +524,11 @@ pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -534,8 +538,8 @@ pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) +pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -545,9 +549,11 @@ pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); - transmute(simd_select_bitmask(k, shf, a.as_i64x4())) +pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, a.as_i64x4())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -557,9 +563,11 @@ pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -569,8 +577,8 @@ pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) +pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -580,9 +588,11 @@ pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); - transmute(simd_select_bitmask(k, shf, a.as_i64x2())) +pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, a.as_i64x2())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -592,9 +602,11 @@ pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvq))] -pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -604,8 +616,8 @@ pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) +pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -615,9 +627,11 @@ pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); - transmute(simd_select_bitmask(k, shf, a.as_i32x16())) +pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, a.as_i32x16())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -627,14 +641,11 @@ pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm512_maskz_shldv_epi32( - k: __mmask16, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -644,8 +655,8 @@ pub unsafe fn _mm512_maskz_shldv_epi32( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) +pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -655,9 +666,11 @@ pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); - transmute(simd_select_bitmask(k, shf, a.as_i32x8())) +pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, a.as_i32x8())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -667,9 +680,11 @@ pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -679,8 +694,8 @@ pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) +pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -690,9 +705,11 @@ pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); - transmute(simd_select_bitmask(k, shf, a.as_i32x4())) +pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, a.as_i32x4())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -702,9 +719,11 @@ pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvd))] -pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -714,8 +733,8 @@ pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) +pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -725,9 +744,11 @@ pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); - transmute(simd_select_bitmask(k, shf, a.as_i16x32())) +pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, a.as_i16x32())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -737,14 +758,11 @@ pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm512_maskz_shldv_epi16( - k: __mmask32, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -754,8 +772,8 @@ pub unsafe fn _mm512_maskz_shldv_epi16( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) +pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -765,9 +783,11 @@ pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); - transmute(simd_select_bitmask(k, shf, a.as_i16x16())) +pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, a.as_i16x16())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -777,14 +797,11 @@ pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm256_maskz_shldv_epi16( - k: __mmask16, - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -794,8 +811,8 @@ pub unsafe fn _mm256_maskz_shldv_epi16( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) +pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -805,9 +822,11 @@ pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); - transmute(simd_select_bitmask(k, shf, a.as_i16x8())) +pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, a.as_i16x8())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -817,9 +836,11 @@ pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldvw))] -pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -829,8 +850,8 @@ pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) +pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -840,9 +861,11 @@ pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); - transmute(simd_select_bitmask(k, shf, a.as_i64x8())) +pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, a.as_i64x8())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -852,9 +875,11 @@ pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -864,8 +889,8 @@ pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) +pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -875,9 +900,11 @@ pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); - transmute(simd_select_bitmask(k, shf, a.as_i64x4())) +pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, a.as_i64x4())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -887,9 +914,11 @@ pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -899,8 +928,8 @@ pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) +pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -910,9 +939,11 @@ pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); - transmute(simd_select_bitmask(k, shf, a.as_i64x2())) +pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, a.as_i64x2())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -922,9 +953,11 @@ pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvq))] -pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -934,8 +967,8 @@ pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) +pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -945,9 +978,11 @@ pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); - transmute(simd_select_bitmask(k, shf, a.as_i32x16())) +pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, a.as_i32x16())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -957,14 +992,11 @@ pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm512_maskz_shrdv_epi32( - k: __mmask16, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -974,8 +1006,8 @@ pub unsafe fn _mm512_maskz_shrdv_epi32( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) +pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -985,9 +1017,11 @@ pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); - transmute(simd_select_bitmask(k, shf, a.as_i32x8())) +pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, a.as_i32x8())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -997,9 +1031,11 @@ pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -1009,8 +1045,8 @@ pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) +pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1020,9 +1056,11 @@ pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); - transmute(simd_select_bitmask(k, shf, a.as_i32x4())) +pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, a.as_i32x4())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1032,9 +1070,11 @@ pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvd))] -pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1044,8 +1084,8 @@ pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { - transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) +pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1055,9 +1095,11 @@ pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { - let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); - transmute(simd_select_bitmask(k, shf, a.as_i16x32())) +pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, a.as_i16x32())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1067,14 +1109,11 @@ pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: _ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm512_maskz_shrdv_epi16( - k: __mmask32, - a: __m512i, - b: __m512i, - c: __m512i, -) -> __m512i { - let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i { + unsafe { + let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1084,8 +1123,8 @@ pub unsafe fn _mm512_maskz_shrdv_epi16( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { - transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) +pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1095,9 +1134,11 @@ pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { - let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); - transmute(simd_select_bitmask(k, shf, a.as_i16x16())) +pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, a.as_i16x16())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1107,14 +1148,11 @@ pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: _ #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm256_maskz_shrdv_epi16( - k: __mmask16, - a: __m256i, - b: __m256i, - c: __m256i, -) -> __m256i { - let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i { + unsafe { + let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1124,8 +1162,8 @@ pub unsafe fn _mm256_maskz_shrdv_epi16( #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { - transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) +pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -1135,9 +1173,11 @@ pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i { #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); - transmute(simd_select_bitmask(k, shf, a.as_i16x8())) +pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, a.as_i16x8())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1147,9 +1187,11 @@ pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshrdvw))] -pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { - let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { + unsafe { + let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1160,7 +1202,7 @@ pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) } @@ -1173,15 +1215,17 @@ pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shldi_epi64( +pub fn _mm512_mask_shldi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1192,14 +1236,12 @@ pub unsafe fn _mm512_mask_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shldi_epi64( - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1210,7 +1252,7 @@ pub unsafe fn _mm512_maskz_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) } @@ -1223,15 +1265,17 @@ pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shldi_epi64( +pub fn _mm256_mask_shldi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1242,14 +1286,12 @@ pub unsafe fn _mm256_mask_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shldi_epi64( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1260,7 +1302,7 @@ pub unsafe fn _mm256_maskz_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) } @@ -1273,15 +1315,17 @@ pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shldi_epi64( +pub fn _mm_mask_shldi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi64::(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1292,14 +1336,12 @@ pub unsafe fn _mm_mask_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shldi_epi64( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi64::(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1310,7 +1352,7 @@ pub unsafe fn _mm_maskz_shldi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8)) } @@ -1323,15 +1365,17 @@ pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shldi_epi32( +pub fn _mm512_mask_shldi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1342,14 +1386,12 @@ pub unsafe fn _mm512_mask_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shldi_epi32( - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1360,7 +1402,7 @@ pub unsafe fn _mm512_maskz_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8)) } @@ -1373,15 +1415,17 @@ pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shldi_epi32( +pub fn _mm256_mask_shldi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1392,14 +1436,12 @@ pub unsafe fn _mm256_mask_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shldi_epi32( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1410,7 +1452,7 @@ pub unsafe fn _mm256_maskz_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8)) } @@ -1423,15 +1465,17 @@ pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shldi_epi32( +pub fn _mm_mask_shldi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi32::(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1442,14 +1486,12 @@ pub unsafe fn _mm_mask_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shldi_epi32( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi32::(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1460,7 +1502,7 @@ pub unsafe fn _mm_maskz_shldi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) } @@ -1473,15 +1515,17 @@ pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shldi_epi16( +pub fn _mm512_mask_shldi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1492,14 +1536,12 @@ pub unsafe fn _mm512_mask_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shldi_epi16( - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1510,7 +1552,7 @@ pub unsafe fn _mm512_maskz_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) } @@ -1523,15 +1565,17 @@ pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shldi_epi16( +pub fn _mm256_mask_shldi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1542,14 +1586,12 @@ pub unsafe fn _mm256_mask_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shldi_epi16( - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1560,7 +1602,7 @@ pub unsafe fn _mm256_maskz_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) } @@ -1573,15 +1615,17 @@ pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shldi_epi16( +pub fn _mm_mask_shldi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi16::(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1592,14 +1636,12 @@ pub unsafe fn _mm_mask_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shldi_epi16( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shldi_epi16::(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shldi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1610,7 +1652,7 @@ pub unsafe fn _mm_maskz_shldi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64)) } @@ -1623,15 +1665,17 @@ pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shrdi_epi64( +pub fn _mm512_mask_shrdi_epi64( src: __m512i, k: __mmask8, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1642,14 +1686,12 @@ pub unsafe fn _mm512_mask_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shrdi_epi64( - k: __mmask8, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) +pub fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1660,7 +1702,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64)) } @@ -1673,15 +1715,17 @@ pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shrdi_epi64( +pub fn _mm256_mask_shrdi_epi64( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, src.as_i64x4())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1692,14 +1736,12 @@ pub unsafe fn _mm256_mask_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shrdi_epi64( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); - transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) +pub fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1710,7 +1752,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64)) } @@ -1723,15 +1765,17 @@ pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shrdi_epi64( +pub fn _mm_mask_shrdi_epi64( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, src.as_i64x2())) + } } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1742,14 +1786,12 @@ pub unsafe fn _mm_mask_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shrdi_epi64( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); - transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) +pub fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1760,7 +1802,7 @@ pub unsafe fn _mm_maskz_shrdi_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8)) } @@ -1773,15 +1815,17 @@ pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shrdi_epi32( +pub fn _mm512_mask_shrdi_epi32( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1792,14 +1836,12 @@ pub unsafe fn _mm512_mask_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shrdi_epi32( - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) +pub fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1810,7 +1852,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8)) } @@ -1823,15 +1865,17 @@ pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shrdi_epi32( +pub fn _mm256_mask_shrdi_epi32( src: __m256i, k: __mmask8, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, src.as_i32x8())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1842,14 +1886,12 @@ pub unsafe fn _mm256_mask_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shrdi_epi32( - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); - transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) +pub fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1860,7 +1902,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8)) } @@ -1873,15 +1915,17 @@ pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shrdi_epi32( +pub fn _mm_mask_shrdi_epi32( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, src.as_i32x4())) + } } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1892,14 +1936,12 @@ pub unsafe fn _mm_mask_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shrdi_epi32( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); - transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) +pub fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -1910,7 +1952,7 @@ pub unsafe fn _mm_maskz_shrdi_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { +pub fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16)) } @@ -1923,15 +1965,17 @@ pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm512_mask_shrdi_epi16( +pub fn _mm512_mask_shrdi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, src.as_i16x32())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1942,14 +1986,12 @@ pub unsafe fn _mm512_mask_shrdi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_maskz_shrdi_epi16( - k: __mmask32, - a: __m512i, - b: __m512i, -) -> __m512i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) +pub fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -1960,7 +2002,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { +pub fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16)) } @@ -1973,15 +2015,17 @@ pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm256_mask_shrdi_epi16( +pub fn _mm256_mask_shrdi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, src.as_i16x16())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -1992,14 +2036,12 @@ pub unsafe fn _mm256_mask_shrdi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_maskz_shrdi_epi16( - k: __mmask16, - a: __m256i, - b: __m256i, -) -> __m256i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) +pub fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -2010,7 +2052,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16)) } @@ -2023,15 +2065,17 @@ pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(4)] -pub unsafe fn _mm_mask_shrdi_epi16( +pub fn _mm_mask_shrdi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, src.as_i16x8())) + } } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -2042,14 +2086,12 @@ pub unsafe fn _mm_mask_shrdi_epi16( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm_maskz_shrdi_epi16( - k: __mmask8, - a: __m128i, - b: __m128i, -) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) +pub fn _mm_maskz_shrdi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vnni.rs b/crates/core_arch/src/x86/avx512vnni.rs index 1e1639b700..d7cd0838c2 100644 --- a/crates/core_arch/src/x86/avx512vnni.rs +++ b/crates/core_arch/src/x86/avx512vnni.rs @@ -11,8 +11,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { - transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22,14 +22,11 @@ pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51 #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm512_mask_dpwssd_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -39,14 +36,11 @@ pub unsafe fn _mm512_mask_dpwssd_epi32( #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm512_maskz_dpwssd_epi32( - k: __mmask16, - src: __m512i, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -59,8 +53,8 @@ pub unsafe fn _mm512_maskz_dpwssd_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpwssd) )] -pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -70,8 +64,8 @@ pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -81,14 +75,11 @@ pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm256_mask_dpwssd_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -98,14 +89,11 @@ pub unsafe fn _mm256_mask_dpwssd_epi32( #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm256_maskz_dpwssd_epi32( - k: __mmask8, - src: __m256i, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -118,8 +106,8 @@ pub unsafe fn _mm256_maskz_dpwssd_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpwssd) )] -pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -129,8 +117,8 @@ pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -140,9 +128,11 @@ pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -152,9 +142,11 @@ pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssd))] -pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -164,8 +156,8 @@ pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _ #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { - transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -175,14 +167,11 @@ pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5 #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm512_mask_dpwssds_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -192,14 +181,11 @@ pub unsafe fn _mm512_mask_dpwssds_epi32( #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm512_maskz_dpwssds_epi32( - k: __mmask16, - src: __m512i, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -212,8 +198,8 @@ pub unsafe fn _mm512_maskz_dpwssds_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpwssds) )] -pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -223,8 +209,8 @@ pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -234,14 +220,11 @@ pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm256_mask_dpwssds_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -251,14 +234,11 @@ pub unsafe fn _mm256_mask_dpwssds_epi32( #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm256_maskz_dpwssds_epi32( - k: __mmask8, - src: __m256i, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -271,8 +251,8 @@ pub unsafe fn _mm256_maskz_dpwssds_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpwssds) )] -pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -282,8 +262,8 @@ pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -293,9 +273,11 @@ pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -305,14 +287,11 @@ pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpwssds))] -pub unsafe fn _mm_maskz_dpwssds_epi32( - k: __mmask8, - src: __m128i, - a: __m128i, - b: __m128i, -) -> __m128i { - let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -322,8 +301,8 @@ pub unsafe fn _mm_maskz_dpwssds_epi32( #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { - transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -333,14 +312,11 @@ pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51 #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm512_mask_dpbusd_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -350,14 +326,11 @@ pub unsafe fn _mm512_mask_dpbusd_epi32( #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm512_maskz_dpbusd_epi32( - k: __mmask16, - src: __m512i, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -370,8 +343,8 @@ pub unsafe fn _mm512_maskz_dpbusd_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpbusd) )] -pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -381,8 +354,8 @@ pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -392,14 +365,11 @@ pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm256_mask_dpbusd_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -409,14 +379,11 @@ pub unsafe fn _mm256_mask_dpbusd_epi32( #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm256_maskz_dpbusd_epi32( - k: __mmask8, - src: __m256i, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -429,8 +396,8 @@ pub unsafe fn _mm256_maskz_dpbusd_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpbusd) )] -pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -440,8 +407,8 @@ pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -451,9 +418,11 @@ pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -463,9 +432,11 @@ pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusd))] -pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -475,8 +446,8 @@ pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _ #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { - transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) +pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -486,14 +457,11 @@ pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5 #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm512_mask_dpbusds_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, src.as_i32x16())) +pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, src.as_i32x16())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -503,14 +471,11 @@ pub unsafe fn _mm512_mask_dpbusds_epi32( #[target_feature(enable = "avx512vnni")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm512_maskz_dpbusds_epi32( - k: __mmask16, - src: __m512i, - a: __m512i, - b: __m512i, -) -> __m512i { - let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); - transmute(simd_select_bitmask(k, r, i32x16::ZERO)) +pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i { + unsafe { + let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -523,8 +488,8 @@ pub unsafe fn _mm512_maskz_dpbusds_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpbusds) )] -pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -534,8 +499,8 @@ pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -545,14 +510,11 @@ pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm256_mask_dpbusds_epi32( - src: __m256i, - k: __mmask8, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, src.as_i32x8())) +pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, src.as_i32x8())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -562,14 +524,11 @@ pub unsafe fn _mm256_mask_dpbusds_epi32( #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm256_maskz_dpbusds_epi32( - k: __mmask8, - src: __m256i, - a: __m256i, - b: __m256i, -) -> __m256i { - let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); - transmute(simd_select_bitmask(k, r, i32x8::ZERO)) +pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { + let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -582,8 +541,8 @@ pub unsafe fn _mm256_maskz_dpbusds_epi32( all(test, any(target_os = "linux", target_env = "msvc")), assert_instr(vpdpbusds) )] -pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -593,8 +552,8 @@ pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -604,9 +563,11 @@ pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { - let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, src.as_i32x4())) +pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, src.as_i32x4())) + } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). @@ -616,14 +577,11 @@ pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _ #[target_feature(enable = "avx512vnni,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpdpbusds))] -pub unsafe fn _mm_maskz_dpbusds_epi32( - k: __mmask8, - src: __m128i, - a: __m128i, - b: __m128i, -) -> __m128i { - let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); - transmute(simd_select_bitmask(k, r, i32x4::ZERO)) +pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { + let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) + } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit @@ -638,8 +596,8 @@ pub unsafe fn _mm_maskz_dpbusds_epi32( assert_instr(vpdpbssd) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit @@ -654,8 +612,8 @@ pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbssd) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit @@ -670,8 +628,8 @@ pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpbssds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit @@ -686,8 +644,8 @@ pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbssds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit @@ -702,8 +660,8 @@ pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 assert_instr(vpdpbsud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit @@ -718,8 +676,8 @@ pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbsud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit @@ -734,8 +692,8 @@ pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpbsuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit @@ -750,8 +708,8 @@ pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbsuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit @@ -766,8 +724,8 @@ pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 assert_instr(vpdpbuud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit @@ -782,8 +740,8 @@ pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbuud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit @@ -798,8 +756,8 @@ pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpbuuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit @@ -814,8 +772,8 @@ pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpbuuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit @@ -830,8 +788,8 @@ pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 assert_instr(vpdpwsud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit @@ -846,8 +804,8 @@ pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwsud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit @@ -862,8 +820,8 @@ pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpwsuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit @@ -878,8 +836,8 @@ pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwsuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit @@ -894,8 +852,8 @@ pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 assert_instr(vpdpwusd) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit @@ -910,8 +868,8 @@ pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwusd) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit @@ -926,8 +884,8 @@ pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpwusds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit @@ -942,8 +900,8 @@ pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwusds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit @@ -958,8 +916,8 @@ pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2 assert_instr(vpdpwuud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit @@ -974,8 +932,8 @@ pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwuud) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit @@ -990,8 +948,8 @@ pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25 assert_instr(vpdpwuuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { - transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) +pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) } } /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit @@ -1006,8 +964,8 @@ pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i assert_instr(vpdpwuuds) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { - transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) +pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i { + unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vpopcntdq.rs b/crates/core_arch/src/x86/avx512vpopcntdq.rs index 0bc343acae..7a06f09b19 100644 --- a/crates/core_arch/src/x86/avx512vpopcntdq.rs +++ b/crates/core_arch/src/x86/avx512vpopcntdq.rs @@ -26,8 +26,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { - transmute(simd_ctpop(a.as_i32x16())) +pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i32x16())) } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -40,12 +40,14 @@ pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x16()), - i32x16::ZERO, - )) +pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x16()), + i32x16::ZERO, + )) + } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -58,12 +60,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x16()), - src.as_i32x16(), - )) +pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x16()), + src.as_i32x16(), + )) + } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -73,8 +77,8 @@ pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) - #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { - transmute(simd_ctpop(a.as_i32x8())) +pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i32x8())) } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -87,12 +91,14 @@ pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x8()), - i32x8::ZERO, - )) +pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x8()), + i32x8::ZERO, + )) + } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -105,12 +111,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x8()), - src.as_i32x8(), - )) +pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x8()), + src.as_i32x8(), + )) + } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -120,8 +128,8 @@ pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i { - transmute(simd_ctpop(a.as_i32x4())) +pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i32x4())) } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -134,12 +142,14 @@ pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x4()), - i32x4::ZERO, - )) +pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x4()), + i32x4::ZERO, + )) + } } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -152,12 +162,14 @@ pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] -pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i32x4()), - src.as_i32x4(), - )) +pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x4()), + src.as_i32x4(), + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -167,8 +179,8 @@ pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __ #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { - transmute(simd_ctpop(a.as_i64x8())) +pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { + unsafe { transmute(simd_ctpop(a.as_i64x8())) } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -181,12 +193,14 @@ pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x8()), - i64x8::ZERO, - )) +pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x8()), + i64x8::ZERO, + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -199,12 +213,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { #[target_feature(enable = "avx512vpopcntdq")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x8()), - src.as_i64x8(), - )) +pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x8()), + src.as_i64x8(), + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -214,8 +230,8 @@ pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { - transmute(simd_ctpop(a.as_i64x4())) +pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { + unsafe { transmute(simd_ctpop(a.as_i64x4())) } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -228,12 +244,14 @@ pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x4()), - i64x4::ZERO, - )) +pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x4()), + i64x4::ZERO, + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -246,12 +264,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x4()), - src.as_i64x4(), - )) +pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x4()), + src.as_i64x4(), + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -261,8 +281,8 @@ pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i { - transmute(simd_ctpop(a.as_i64x2())) +pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i { + unsafe { transmute(simd_ctpop(a.as_i64x2())) } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -275,12 +295,14 @@ pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x2()), - i64x2::ZERO, - )) +pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x2()), + i64x2::ZERO, + )) + } } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -293,12 +315,14 @@ pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { #[target_feature(enable = "avx512vpopcntdq,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] -pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { - transmute(simd_select_bitmask( - k, - simd_ctpop(a.as_i64x2()), - src.as_i64x2(), - )) +pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + unsafe { + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x2()), + src.as_i64x2(), + )) + } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/avxneconvert.rs b/crates/core_arch/src/x86/avxneconvert.rs index 4520529934..cae48509ea 100644 --- a/crates/core_arch/src/x86/avxneconvert.rs +++ b/crates/core_arch/src/x86/avxneconvert.rs @@ -199,15 +199,17 @@ pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 { assert_instr(vcvtneps2bf16) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { - let mut dst: __m128bh; - asm!( - "{{vex}}vcvtneps2bf16 {dst},{src}", - dst = lateout(xmm_reg) dst, - src = in(xmm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst +pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "{{vex}}vcvtneps2bf16 {dst},{src}", + dst = lateout(xmm_reg) dst, + src = in(xmm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } } /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point @@ -221,15 +223,17 @@ pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { assert_instr(vcvtneps2bf16) )] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { - let mut dst: __m128bh; - asm!( - "{{vex}}vcvtneps2bf16 {dst},{src}", - dst = lateout(xmm_reg) dst, - src = in(ymm_reg) a, - options(pure, nomem, nostack, preserves_flags) - ); - dst +pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { + unsafe { + let mut dst: __m128bh; + asm!( + "{{vex}}vcvtneps2bf16 {dst},{src}", + dst = lateout(xmm_reg) dst, + src = in(ymm_reg) a, + options(pure, nomem, nostack, preserves_flags) + ); + dst + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/bmi1.rs b/crates/core_arch/src/x86/bmi1.rs index b8eca101de..eb7242944a 100644 --- a/crates/core_arch/src/x86/bmi1.rs +++ b/crates/core_arch/src/x86/bmi1.rs @@ -20,7 +20,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { +pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32)) } @@ -35,8 +35,8 @@ pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 { - x86_bmi_bextr_32(a, control) +pub fn _bextr2_u32(a: u32, control: u32) -> u32 { + unsafe { x86_bmi_bextr_32(a, control) } } /// Bitwise logical `AND` of inverted `a` with `b`. @@ -46,7 +46,7 @@ pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 { +pub fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } @@ -57,7 +57,7 @@ pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsi))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsi_u32(x: u32) -> u32 { +pub fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } @@ -68,7 +68,7 @@ pub unsafe fn _blsi_u32(x: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsmsk_u32(x: u32) -> u32 { +pub fn _blsmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_sub(1_u32)) } @@ -81,7 +81,7 @@ pub unsafe fn _blsmsk_u32(x: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsr))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsr_u32(x: u32) -> u32 { +pub fn _blsr_u32(x: u32) -> u32 { x & (x.wrapping_sub(1)) } @@ -94,7 +94,7 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _tzcnt_u16(x: u16) -> u16 { +pub fn _tzcnt_u16(x: u16) -> u16 { x.trailing_zeros() as u16 } @@ -107,7 +107,7 @@ pub unsafe fn _tzcnt_u16(x: u16) -> u16 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _tzcnt_u32(x: u32) -> u32 { +pub fn _tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } @@ -120,7 +120,7 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 { +pub fn _mm_tzcnt_32(x: u32) -> i32 { x.trailing_zeros() as i32 } diff --git a/crates/core_arch/src/x86/bmi2.rs b/crates/core_arch/src/x86/bmi2.rs index 2f7b31c43b..83cf650923 100644 --- a/crates/core_arch/src/x86/bmi2.rs +++ b/crates/core_arch/src/x86/bmi2.rs @@ -25,7 +25,7 @@ use stdarch_test::assert_instr; #[cfg_attr(all(test, target_arch = "x86"), assert_instr(mul))] #[target_feature(enable = "bmi2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { +pub fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { let result: u64 = (a as u64) * (b as u64); *hi = (result >> 32) as u32; result as u32 @@ -38,8 +38,8 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(bzhi))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 { - x86_bmi2_bzhi_32(a, index) +pub fn _bzhi_u32(a: u32, index: u32) -> u32 { + unsafe { x86_bmi2_bzhi_32(a, index) } } /// Scatter contiguous low order bits of `a` to the result at the positions @@ -50,8 +50,8 @@ pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 { #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pdep))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 { - x86_bmi2_pdep_32(a, mask) +pub fn _pdep_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pdep_32(a, mask) } } /// Gathers the bits of `x` specified by the `mask` into the contiguous low @@ -62,8 +62,8 @@ pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 { #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pext))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 { - x86_bmi2_pext_32(a, mask) +pub fn _pext_u32(a: u32, mask: u32) -> u32 { + unsafe { x86_bmi2_pext_32(a, mask) } } unsafe extern "C" { diff --git a/crates/core_arch/src/x86/f16c.rs b/crates/core_arch/src/x86/f16c.rs index ff3a544117..7686b317d4 100644 --- a/crates/core_arch/src/x86/f16c.rs +++ b/crates/core_arch/src/x86/f16c.rs @@ -28,8 +28,8 @@ unsafe extern "unadjusted" { #[target_feature(enable = "f16c")] #[cfg_attr(test, assert_instr("vcvtph2ps"))] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] -pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { - transmute(llvm_vcvtph2ps_128(transmute(a))) +pub fn _mm_cvtph_ps(a: __m128i) -> __m128 { + unsafe { transmute(llvm_vcvtph2ps_128(transmute(a))) } } /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector @@ -40,8 +40,8 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { #[target_feature(enable = "f16c")] #[cfg_attr(test, assert_instr("vcvtph2ps"))] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] -pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { - transmute(llvm_vcvtph2ps_256(transmute(a))) +pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 { + unsafe { transmute(llvm_vcvtph2ps_256(transmute(a))) } } /// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x @@ -62,11 +62,13 @@ pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { #[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] -pub unsafe fn _mm_cvtps_ph(a: __m128) -> __m128i { +pub fn _mm_cvtps_ph(a: __m128) -> __m128i { static_assert_uimm_bits!(IMM_ROUNDING, 3); - let a = a.as_f32x4(); - let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING); - transmute(r) + unsafe { + let a = a.as_f32x4(); + let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING); + transmute(r) + } } /// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x @@ -86,11 +88,13 @@ pub unsafe fn _mm_cvtps_ph(a: __m128) -> __m128i { #[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")] -pub unsafe fn _mm256_cvtps_ph(a: __m256) -> __m128i { +pub fn _mm256_cvtps_ph(a: __m256) -> __m128i { static_assert_uimm_bits!(IMM_ROUNDING, 3); - let a = a.as_f32x8(); - let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING); - transmute(r) + unsafe { + let a = a.as_f32x8(); + let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING); + transmute(r) + } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/fma.rs b/crates/core_arch/src/x86/fma.rs index 7e5b93c839..d3988422b9 100644 --- a/crates/core_arch/src/x86/fma.rs +++ b/crates/core_arch/src/x86/fma.rs @@ -33,8 +33,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(a, b, c) +pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -45,8 +45,8 @@ pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(a, b, c) +pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -57,8 +57,8 @@ pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(a, b, c) +pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -69,8 +69,8 @@ pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(a, b, c) +pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, c) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -83,12 +83,14 @@ pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) - ) +pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -101,12 +103,14 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c)) - ) +pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -118,10 +122,12 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [2, 1]) +pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [2, 1]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -133,10 +139,12 @@ pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -148,10 +156,12 @@ pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -163,10 +173,12 @@ pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) +pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -177,8 +189,8 @@ pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(a, b, simd_neg(c)) +pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -189,8 +201,8 @@ pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(a, b, simd_neg(c)) +pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -201,8 +213,8 @@ pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(a, b, simd_neg(c)) +pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -213,8 +225,8 @@ pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(a, b, simd_neg(c)) +pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(a, b, simd_neg(c)) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -227,12 +239,14 @@ pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) - ) +pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), _mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -245,12 +259,14 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c)) - ) +pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), _mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -262,10 +278,12 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 3]) +pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 3]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -277,10 +295,12 @@ pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 5, 2, 7]) +pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -292,10 +312,12 @@ pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 5, 2, 7]) +pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 5, 2, 7]) + } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -307,10 +329,12 @@ pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let add = simd_fma(a, b, c); - let sub = simd_fma(a, b, simd_neg(c)); - simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) +pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { + let add = simd_fma(a, b, c); + let sub = simd_fma(a, b, simd_neg(c)); + simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15]) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -321,8 +345,8 @@ pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(simd_neg(a), b, c) +pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -333,8 +357,8 @@ pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(simd_neg(a), b, c) +pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -345,8 +369,8 @@ pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(simd_neg(a), b, c) +pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -357,8 +381,8 @@ pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(simd_neg(a), b, c) +pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, c) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -371,12 +395,14 @@ pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) - ) +pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), _mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -389,12 +415,14 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c)) - ) +pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), _mm_cvtss_f32(c)) + ) + } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -406,8 +434,8 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -419,8 +447,8 @@ pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -432,8 +460,8 @@ pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` @@ -445,8 +473,8 @@ pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - simd_fma(simd_neg(a), b, simd_neg(c)) +pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { + unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) } } /// Multiplies the lower double-precision (64-bit) floating-point elements in @@ -460,12 +488,14 @@ pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { - simd_insert!( - a, - 0, - fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) - ) +pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { + unsafe { + simd_insert!( + a, + 0, + fmaf64(_mm_cvtsd_f64(a), -_mm_cvtsd_f64(b), -_mm_cvtsd_f64(c)) + ) + } } /// Multiplies the lower single-precision (32-bit) floating-point elements in @@ -479,12 +509,14 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { - simd_insert!( - a, - 0, - fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c)) - ) +pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { + unsafe { + simd_insert!( + a, + 0, + fmaf32(_mm_cvtss_f32(a), -_mm_cvtss_f32(b), -_mm_cvtss_f32(c)) + ) + } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index e97fd6143a..9480c23ba1 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -130,13 +130,13 @@ types! { /// use std::arch::x86_64::*; /// /// # fn main() { - /// # #[target_feature(enable = "sse")] + /// # #[target_feature(enable = "sse2")] /// # unsafe fn foo() { unsafe { /// let two_zeros = _mm_setzero_pd(); /// let two_ones = _mm_set1_pd(1.0); /// let two_floats = _mm_set_pd(1.0, 2.0); /// # }} - /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } + /// # if is_x86_feature_detected!("sse2") { unsafe { foo() } } /// # } /// ``` pub struct __m128d(2 x f64); diff --git a/crates/core_arch/src/x86/pclmulqdq.rs b/crates/core_arch/src/x86/pclmulqdq.rs index e346464fb3..cce6a51e2c 100644 --- a/crates/core_arch/src/x86/pclmulqdq.rs +++ b/crates/core_arch/src/x86/pclmulqdq.rs @@ -28,9 +28,9 @@ unsafe extern "C" { #[cfg_attr(test, assert_instr(pclmul, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_clmulepi64_si128(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - pclmulqdq(a, b, IMM8 as u8) + unsafe { pclmulqdq(a, b, IMM8 as u8) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/sha.rs b/crates/core_arch/src/x86/sha.rs index 13aee75c00..2bf517869b 100644 --- a/crates/core_arch/src/x86/sha.rs +++ b/crates/core_arch/src/x86/sha.rs @@ -30,8 +30,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg1))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) } } /// Performs the final calculation for the next four SHA1 message values @@ -43,8 +43,8 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg2))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) } } /// Calculate SHA1 state variable E after four rounds of operation from the @@ -56,8 +56,8 @@ pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1nexte))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) } } /// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) @@ -72,9 +72,9 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(sha1rnds4, FUNC = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(FUNC, 2); - transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) + unsafe { transmute(sha1rnds4(a.as_i32x4(), b.as_i32x4(), FUNC as i8)) } } /// Performs an intermediate calculation for the next four SHA256 message values @@ -86,8 +86,8 @@ pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i) -> __ #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg1))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) } } /// Performs the final calculation for the next four SHA256 message values @@ -99,8 +99,8 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg2))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) } } /// Performs 2 rounds of SHA256 operation using an initial SHA256 state @@ -114,8 +114,8 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256rnds2))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i { - transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) +pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i { + unsafe { transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 6753caa21b..0286a15811 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -18,8 +18,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { - simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) +pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) } } /// Adds packed single-precision (32-bit) floating-point elements in `a` and @@ -30,8 +30,8 @@ pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { - simd_add(a, b) +pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_add(a, b) } } /// Subtracts the first component of `b` from `a`, the other components are @@ -42,8 +42,8 @@ pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { - simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) +pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) } } /// Subtracts packed single-precision (32-bit) floating-point elements in `a` and @@ -54,8 +54,8 @@ pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { - simd_sub(a, b) +pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_sub(a, b) } } /// Multiplies the first component of `a` and `b`, the other components are @@ -66,8 +66,8 @@ pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { - simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) +pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) } } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` and @@ -78,8 +78,8 @@ pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { - simd_mul(a, b) +pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_mul(a, b) } } /// Divides the first component of `b` by `a`, the other components are @@ -90,8 +90,8 @@ pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { - simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) +pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) } } /// Divides packed single-precision (32-bit) floating-point elements in `a` and @@ -102,8 +102,8 @@ pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { - simd_div(a, b) +pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_div(a, b) } } /// Returns the square root of the first single-precision (32-bit) @@ -114,8 +114,8 @@ pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 { - simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) +pub fn _mm_sqrt_ss(a: __m128) -> __m128 { + unsafe { simd_insert!(a, 0, sqrtf32(_mm_cvtss_f32(a))) } } /// Returns the square root of packed single-precision (32-bit) floating-point @@ -126,8 +126,8 @@ pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 { - simd_fsqrt(a) +pub fn _mm_sqrt_ps(a: __m128) -> __m128 { + unsafe { simd_fsqrt(a) } } /// Returns the approximate reciprocal of the first single-precision @@ -138,8 +138,8 @@ pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 { - rcpss(a) +pub fn _mm_rcp_ss(a: __m128) -> __m128 { + unsafe { rcpss(a) } } /// Returns the approximate reciprocal of packed single-precision (32-bit) @@ -150,8 +150,8 @@ pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 { - rcpps(a) +pub fn _mm_rcp_ps(a: __m128) -> __m128 { + unsafe { rcpps(a) } } /// Returns the approximate reciprocal square root of the first single-precision @@ -162,8 +162,8 @@ pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 { - rsqrtss(a) +pub fn _mm_rsqrt_ss(a: __m128) -> __m128 { + unsafe { rsqrtss(a) } } /// Returns the approximate reciprocal square root of packed single-precision @@ -174,8 +174,8 @@ pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { - rsqrtps(a) +pub fn _mm_rsqrt_ps(a: __m128) -> __m128 { + unsafe { rsqrtps(a) } } /// Compares the first single-precision (32-bit) floating-point element of `a` @@ -187,8 +187,8 @@ pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { - minss(a, b) +pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { + unsafe { minss(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` and @@ -199,9 +199,9 @@ pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`. - minps(a, b) + unsafe { minps(a, b) } } /// Compares the first single-precision (32-bit) floating-point element of `a` @@ -213,8 +213,8 @@ pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { - maxss(a, b) +pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { + unsafe { maxss(a, b) } } /// Compares packed single-precision (32-bit) floating-point elements in `a` and @@ -225,9 +225,9 @@ pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`. - maxps(a, b) + unsafe { maxps(a, b) } } /// Bitwise AND of packed single-precision (32-bit) floating-point elements. @@ -241,10 +241,12 @@ pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { assert_instr(andps) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { - let a: __m128i = mem::transmute(a); - let b: __m128i = mem::transmute(b); - mem::transmute(simd_and(a, b)) +pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_and(a, b)) + } } /// Bitwise AND-NOT of packed single-precision (32-bit) floating-point @@ -262,11 +264,13 @@ pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { assert_instr(andnps) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { - let a: __m128i = mem::transmute(a); - let b: __m128i = mem::transmute(b); - let mask: __m128i = mem::transmute(i32x4::splat(-1)); - mem::transmute(simd_and(simd_xor(mask, a), b)) +pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + let mask: __m128i = mem::transmute(i32x4::splat(-1)); + mem::transmute(simd_and(simd_xor(mask, a), b)) + } } /// Bitwise OR of packed single-precision (32-bit) floating-point elements. @@ -280,10 +284,12 @@ pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { assert_instr(orps) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { - let a: __m128i = mem::transmute(a); - let b: __m128i = mem::transmute(b); - mem::transmute(simd_or(a, b)) +pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_or(a, b)) + } } /// Bitwise exclusive OR of packed single-precision (32-bit) floating-point @@ -298,10 +304,12 @@ pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { assert_instr(xorps) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { - let a: __m128i = mem::transmute(a); - let b: __m128i = mem::transmute(b); - mem::transmute(simd_xor(a, b)) +pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a: __m128i = mem::transmute(a); + let b: __m128i = mem::transmute(b); + mem::transmute(simd_xor(a, b)) + } } /// Compares the lowest `f32` of both inputs for equality. The lowest 32 bits of @@ -313,8 +321,8 @@ pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 0) +pub fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 0) } } /// Compares the lowest `f32` of both inputs for less than. The lowest 32 bits @@ -327,8 +335,8 @@ pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 1) +pub fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 1) } } /// Compares the lowest `f32` of both inputs for less than or equal. The lowest @@ -341,8 +349,8 @@ pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 2) +pub fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 2) } } /// Compares the lowest `f32` of both inputs for greater than. The lowest 32 @@ -355,8 +363,8 @@ pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) +pub fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 1), [4, 1, 2, 3]) } } /// Compares the lowest `f32` of both inputs for greater than or equal. The @@ -369,8 +377,8 @@ pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) +pub fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 2), [4, 1, 2, 3]) } } /// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits @@ -383,8 +391,8 @@ pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 4) +pub fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 4) } } /// Compares the lowest `f32` of both inputs for not-less-than. The lowest 32 @@ -397,8 +405,8 @@ pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 5) +pub fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 5) } } /// Compares the lowest `f32` of both inputs for not-less-than-or-equal. The @@ -411,8 +419,8 @@ pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 6) +pub fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 6) } } /// Compares the lowest `f32` of both inputs for not-greater-than. The lowest 32 @@ -425,8 +433,8 @@ pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) +pub fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 5), [4, 1, 2, 3]) } } /// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The @@ -439,8 +447,8 @@ pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) +pub fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, cmpss(b, a, 6), [4, 1, 2, 3]) } } /// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of @@ -453,8 +461,8 @@ pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 7) +pub fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 7) } } /// Checks if the lowest `f32` of both inputs are unordered. The lowest 32 bits @@ -467,8 +475,8 @@ pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { - cmpss(a, b, 3) +pub fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { + unsafe { cmpss(a, b, 3) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -480,8 +488,8 @@ pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 0) +pub fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 0) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -493,8 +501,8 @@ pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 1) +pub fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 1) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -507,8 +515,8 @@ pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 2) +pub fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 2) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -520,8 +528,8 @@ pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 1) +pub fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 1) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -534,8 +542,8 @@ pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 2) +pub fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 2) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -547,8 +555,8 @@ pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 4) +pub fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 4) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -561,8 +569,8 @@ pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 5) +pub fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 5) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -575,8 +583,8 @@ pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { - cmpps(a, b, 6) +pub fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(a, b, 6) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -589,8 +597,8 @@ pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 5) +pub fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 5) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -603,8 +611,8 @@ pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 6) +pub fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 6) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -617,8 +625,8 @@ pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 7) +pub fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 7) } } /// Compares each of the four floats in `a` to the corresponding element in `b`. @@ -631,8 +639,8 @@ pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { - cmpps(b, a, 3) +pub fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { + unsafe { cmpps(b, a, 3) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -643,8 +651,8 @@ pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { - comieq_ss(a, b) +pub fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { + unsafe { comieq_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -655,8 +663,8 @@ pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { - comilt_ss(a, b) +pub fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { + unsafe { comilt_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -668,8 +676,8 @@ pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { - comile_ss(a, b) +pub fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { + unsafe { comile_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -681,8 +689,8 @@ pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { - comigt_ss(a, b) +pub fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { + unsafe { comigt_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -694,8 +702,8 @@ pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { - comige_ss(a, b) +pub fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { + unsafe { comige_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -706,8 +714,8 @@ pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { - comineq_ss(a, b) +pub fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { + unsafe { comineq_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -719,8 +727,8 @@ pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { - ucomieq_ss(a, b) +pub fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomieq_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -733,8 +741,8 @@ pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { - ucomilt_ss(a, b) +pub fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomilt_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -747,8 +755,8 @@ pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { - ucomile_ss(a, b) +pub fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomile_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -761,8 +769,8 @@ pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { - ucomigt_ss(a, b) +pub fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomigt_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -775,8 +783,8 @@ pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { - ucomige_ss(a, b) +pub fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomige_ss(a, b) } } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns @@ -788,8 +796,8 @@ pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { - ucomineq_ss(a, b) +pub fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { + unsafe { ucomineq_ss(a, b) } } /// Converts the lowest 32 bit float in the input vector to a 32 bit integer. @@ -805,8 +813,8 @@ pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 { - cvtss2si(a) +pub fn _mm_cvtss_si32(a: __m128) -> i32 { + unsafe { cvtss2si(a) } } /// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html). @@ -816,7 +824,7 @@ pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { +pub fn _mm_cvt_ss2si(a: __m128) -> i32 { _mm_cvtss_si32(a) } @@ -835,8 +843,8 @@ pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 { - cvttss2si(a) +pub fn _mm_cvttss_si32(a: __m128) -> i32 { + unsafe { cvttss2si(a) } } /// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html). @@ -846,7 +854,7 @@ pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { +pub fn _mm_cvtt_ss2si(a: __m128) -> i32 { _mm_cvttss_si32(a) } @@ -858,8 +866,8 @@ pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { // No point in using assert_instrs. In Unix x86_64 calling convention this is a // no-op, and on msvc it's just a `mov`. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { - simd_extract!(a, 0) +pub fn _mm_cvtss_f32(a: __m128) -> f32 { + unsafe { simd_extract!(a, 0) } } /// Converts a 32 bit integer to a 32 bit float. The result vector is the input @@ -873,8 +881,8 @@ pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { - cvtsi2ss(a, b) +pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { + unsafe { cvtsi2ss(a, b) } } /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html). @@ -884,7 +892,7 @@ pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { +pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { _mm_cvtsi32_ss(a, b) } @@ -896,7 +904,7 @@ pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_ss(a: f32) -> __m128 { +pub fn _mm_set_ss(a: f32) -> __m128 { __m128([a, 0.0, 0.0, 0.0]) } @@ -907,7 +915,7 @@ pub unsafe fn _mm_set_ss(a: f32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_ps(a: f32) -> __m128 { +pub fn _mm_set1_ps(a: f32) -> __m128 { __m128([a, a, a, a]) } @@ -918,7 +926,7 @@ pub unsafe fn _mm_set1_ps(a: f32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_ps1(a: f32) -> __m128 { +pub fn _mm_set_ps1(a: f32) -> __m128 { _mm_set1_ps(a) } @@ -945,7 +953,7 @@ pub unsafe fn _mm_set_ps1(a: f32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { +pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128([d, c, b, a]) } @@ -971,7 +979,7 @@ pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { assert_instr(movaps) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { +pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128([a, b, c, d]) } @@ -982,8 +990,8 @@ pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setzero_ps() -> __m128 { - const { mem::zeroed() } +pub fn _mm_setzero_ps() -> __m128 { + const { unsafe { mem::zeroed() } } } /// A utility function for creating masks to use with Intel shuffle and @@ -1013,18 +1021,20 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { #[cfg_attr(test, assert_instr(shufps, MASK = 3))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!( - a, - b, - [ - MASK as u32 & 0b11, - (MASK as u32 >> 2) & 0b11, - ((MASK as u32 >> 4) & 0b11) + 4, - ((MASK as u32 >> 6) & 0b11) + 4, - ], - ) + unsafe { + simd_shuffle!( + a, + b, + [ + MASK as u32 & 0b11, + (MASK as u32 >> 2) & 0b11, + ((MASK as u32 >> 4) & 0b11) + 4, + ((MASK as u32 >> 6) & 0b11) + 4, + ], + ) + } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1035,8 +1045,8 @@ pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, b, [2, 6, 3, 7]) +pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) } } /// Unpacks and interleave single-precision (32-bit) floating-point elements @@ -1047,8 +1057,8 @@ pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, b, [0, 4, 1, 5]) +pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) } } /// Combine higher half of `a` and `b`. The higher half of `b` occupies the @@ -1059,9 +1069,9 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhlps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { // TODO; figure why this is a different instruction on msvc? - simd_shuffle!(a, b, [6, 7, 2, 3]) + unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) } } /// Combine lower half of `a` and `b`. The lower half of `b` occupies the @@ -1072,8 +1082,8 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, b, [0, 1, 4, 5]) +pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) } } /// Returns a mask of the most significant bit of each element in `a`. @@ -1086,11 +1096,13 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { +pub fn _mm_movemask_ps(a: __m128) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO); + simd_bitmask::(mask).into() + } } /// Construct a `__m128` with the lowest element read from `p` and the other @@ -1338,8 +1350,8 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { - simd_shuffle!(a, b, [4, 1, 2, 3]) +pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { + unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) } } /// Performs a serializing operation on all non-temporal ("streaming") store instructions that @@ -1440,9 +1452,11 @@ pub unsafe fn _mm_sfence() { note = "see `_mm_getcsr` documentation - use inline assembly instead" )] pub unsafe fn _mm_getcsr() -> u32 { - let mut result = 0_i32; - stmxcsr(ptr::addr_of_mut!(result) as *mut i8); - result as u32 + unsafe { + let mut result = 0_i32; + stmxcsr(ptr::addr_of_mut!(result) as *mut i8); + result as u32 + } } /// Sets the MXCSR register with the 32-bit unsigned integer value. @@ -1878,8 +1892,8 @@ pub unsafe fn _mm_prefetch(p: *const i8) { #[inline] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_undefined_ps() -> __m128 { - const { mem::zeroed() } +pub fn _mm_undefined_ps() -> __m128 { + const { unsafe { mem::zeroed() } } } /// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. @@ -1889,7 +1903,7 @@ pub unsafe fn _mm_undefined_ps() -> __m128 { #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _MM_TRANSPOSE4_PS( +pub fn _MM_TRANSPOSE4_PS( row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 08b3712ea8..6812632259 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -76,8 +76,8 @@ pub unsafe fn _mm_mfence() { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_add(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } } /// Adds packed 16-bit integers in `a` and `b`. @@ -87,8 +87,8 @@ pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_add(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } } /// Adds packed 32-bit integers in `a` and `b`. @@ -98,8 +98,8 @@ pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_add(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } } /// Adds packed 64-bit integers in `a` and `b`. @@ -109,8 +109,8 @@ pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_add(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } } /// Adds packed 8-bit integers in `a` and `b` using saturation. @@ -120,8 +120,8 @@ pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) } } /// Adds packed 16-bit integers in `a` and `b` using saturation. @@ -131,8 +131,8 @@ pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) } } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. @@ -142,8 +142,8 @@ pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) } } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. @@ -153,8 +153,8 @@ pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) } } /// Averages packed unsigned 8-bit integers in `a` and `b`. @@ -164,11 +164,13 @@ pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, u16x16>(a.as_u8x16()); - let b = simd_cast::<_, u16x16>(b.as_u8x16()); - let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1)); - transmute(simd_cast::<_, u8x16>(r)) +pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u16x16>(a.as_u8x16()); + let b = simd_cast::<_, u16x16>(b.as_u8x16()); + let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1)); + transmute(simd_cast::<_, u8x16>(r)) + } } /// Averages packed unsigned 16-bit integers in `a` and `b`. @@ -178,11 +180,13 @@ pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, u32x8>(a.as_u16x8()); - let b = simd_cast::<_, u32x8>(b.as_u16x8()); - let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1)); - transmute(simd_cast::<_, u16x8>(r)) +pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u32x8>(a.as_u16x8()); + let b = simd_cast::<_, u32x8>(b.as_u16x8()); + let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1)); + transmute(simd_cast::<_, u16x8>(r)) + } } /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. @@ -196,8 +200,8 @@ pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -208,10 +212,12 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i16x8(); - let b = b.as_i16x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the @@ -222,10 +228,12 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u8x16(); - let b = b.as_u8x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -236,10 +244,12 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i16x8(); - let b = b.as_i16x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the @@ -250,10 +260,12 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminub))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u8x16(); - let b = b.as_u8x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Multiplies the packed 16-bit integers in `a` and `b`. @@ -266,11 +278,13 @@ pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, i32x8>(a.as_i16x8()); - let b = simd_cast::<_, i32x8>(b.as_i16x8()); - let r = simd_shr(simd_mul(a, b), i32x8::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, i32x8>(a.as_i16x8()); + let b = simd_cast::<_, i32x8>(b.as_i16x8()); + let r = simd_shr(simd_mul(a, b), i32x8::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. @@ -283,11 +297,13 @@ pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, u32x8>(a.as_u16x8()); - let b = simd_cast::<_, u32x8>(b.as_u16x8()); - let r = simd_shr(simd_mul(a, b), u32x8::splat(16)); - transmute(simd_cast::(r)) +pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, u32x8>(a.as_u16x8()); + let b = simd_cast::<_, u32x8>(b.as_u16x8()); + let r = simd_shr(simd_mul(a, b), u32x8::splat(16)); + transmute(simd_cast::(r)) + } } /// Multiplies the packed 16-bit integers in `a` and `b`. @@ -300,8 +316,8 @@ pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmullw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } } /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element @@ -314,11 +330,13 @@ pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmuludq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u64x2(); - let b = b.as_u64x2(); - let mask = u64x2::splat(u32::MAX.into()); - transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) +pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u64x2(); + let b = b.as_u64x2(); + let mask = u64x2::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) + } } /// Sum the absolute differences of packed unsigned 8-bit integers. @@ -333,8 +351,8 @@ pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psadbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { - transmute(psadbw(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } } /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. @@ -344,8 +362,8 @@ pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } } /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. @@ -355,8 +373,8 @@ pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. @@ -366,8 +384,8 @@ pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } } /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. @@ -377,8 +395,8 @@ pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` @@ -389,8 +407,8 @@ pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) } } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` @@ -401,8 +419,8 @@ pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) } } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit @@ -413,8 +431,8 @@ pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) } } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit @@ -425,8 +443,8 @@ pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) +pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) } } /// Shifts `a` left by `IMM8` bytes while shifting in zeros. @@ -437,9 +455,9 @@ pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_slli_si128(a: __m128i) -> __m128i { +pub fn _mm_slli_si128(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - _mm_slli_si128_impl::(a) + unsafe { _mm_slli_si128_impl::(a) } } /// Implementation detail: converts the immediate argument of the @@ -483,9 +501,11 @@ unsafe fn _mm_slli_si128_impl(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_bslli_si128(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - _mm_slli_si128_impl::(a) +pub fn _mm_bslli_si128(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + _mm_slli_si128_impl::(a) + } } /// Shifts `a` right by `IMM8` bytes while shifting in zeros. @@ -496,9 +516,11 @@ pub unsafe fn _mm_bslli_si128(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_bsrli_si128(a: __m128i) -> __m128i { - static_assert_uimm_bits!(IMM8, 8); - _mm_srli_si128_impl::(a) +pub fn _mm_bsrli_si128(a: __m128i) -> __m128i { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + _mm_srli_si128_impl::(a) + } } /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -509,12 +531,14 @@ pub unsafe fn _mm_bsrli_si128(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_slli_epi16(a: __m128i) -> __m128i { +pub fn _mm_slli_epi16(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm_setzero_si128() - } else { - transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + unsafe { + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + } } } @@ -526,8 +550,8 @@ pub unsafe fn _mm_slli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(psllw(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -538,12 +562,14 @@ pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_slli_epi32(a: __m128i) -> __m128i { +pub fn _mm_slli_epi32(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm_setzero_si128() - } else { - transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + unsafe { + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + } } } @@ -555,8 +581,8 @@ pub unsafe fn _mm_slli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(pslld(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -567,12 +593,14 @@ pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_slli_epi64(a: __m128i) -> __m128i { +pub fn _mm_slli_epi64(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm_setzero_si128() - } else { - transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + unsafe { + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + } } } @@ -584,8 +612,8 @@ pub unsafe fn _mm_slli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(psllq(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign @@ -597,9 +625,9 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srai_epi16(a: __m128i) -> __m128i { +pub fn _mm_srai_epi16(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) + unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) } } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign @@ -610,8 +638,8 @@ pub unsafe fn _mm_srai_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(psraw(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign @@ -623,9 +651,9 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srai_epi32(a: __m128i) -> __m128i { +pub fn _mm_srai_epi32(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) + unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) } } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign @@ -636,8 +664,8 @@ pub unsafe fn _mm_srai_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(psrad(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) } } /// Shifts `a` right by `IMM8` bytes while shifting in zeros. @@ -648,9 +676,9 @@ pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srli_si128(a: __m128i) -> __m128i { +pub fn _mm_srli_si128(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - _mm_srli_si128_impl::(a) + unsafe { _mm_srli_si128_impl::(a) } } /// Implementation detail: converts the immediate argument of the @@ -699,12 +727,14 @@ unsafe fn _mm_srli_si128_impl(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srli_epi16(a: __m128i) -> __m128i { +pub fn _mm_srli_epi16(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 16 { - _mm_setzero_si128() - } else { - transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + unsafe { + if IMM8 >= 16 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16))) + } } } @@ -716,8 +746,8 @@ pub unsafe fn _mm_srli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { - transmute(psrlw(a.as_i16x8(), count.as_i16x8())) +pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in @@ -729,12 +759,14 @@ pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srli_epi32(a: __m128i) -> __m128i { +pub fn _mm_srli_epi32(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 32 { - _mm_setzero_si128() - } else { - transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + unsafe { + if IMM8 >= 32 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32))) + } } } @@ -746,8 +778,8 @@ pub unsafe fn _mm_srli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { - transmute(psrld(a.as_i32x4(), count.as_i32x4())) +pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) } } /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in @@ -759,12 +791,14 @@ pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srli_epi64(a: __m128i) -> __m128i { +pub fn _mm_srli_epi64(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - if IMM8 >= 64 { - _mm_setzero_si128() - } else { - transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + unsafe { + if IMM8 >= 64 { + _mm_setzero_si128() + } else { + transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))) + } } } @@ -776,8 +810,8 @@ pub unsafe fn _mm_srli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { - transmute(psrlq(a.as_i64x2(), count.as_i64x2())) +pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { + unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } } /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and @@ -788,8 +822,8 @@ pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { - simd_and(a, b) +pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_and(a, b) } } /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and @@ -800,8 +834,8 @@ pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { - simd_and(simd_xor(_mm_set1_epi8(-1), a), b) +pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } } /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and @@ -812,8 +846,8 @@ pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { - simd_or(a, b) +pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_or(a, b) } } /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and @@ -824,8 +858,8 @@ pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { - simd_xor(a, b) +pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { + unsafe { simd_xor(a, b) } } /// Compares packed 8-bit integers in `a` and `b` for equality. @@ -835,8 +869,8 @@ pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } } /// Compares packed 16-bit integers in `a` and `b` for equality. @@ -846,8 +880,8 @@ pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } } /// Compares packed 32-bit integers in `a` and `b` for equality. @@ -857,8 +891,8 @@ pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } } /// Compares packed 8-bit integers in `a` and `b` for greater-than. @@ -868,8 +902,8 @@ pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } } /// Compares packed 16-bit integers in `a` and `b` for greater-than. @@ -879,8 +913,8 @@ pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } } /// Compares packed 32-bit integers in `a` and `b` for greater-than. @@ -890,8 +924,8 @@ pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } } /// Compares packed 8-bit integers in `a` and `b` for less-than. @@ -901,8 +935,8 @@ pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } } /// Compares packed 16-bit integers in `a` and `b` for less-than. @@ -912,8 +946,8 @@ pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } } /// Compares packed 32-bit integers in `a` and `b` for less-than. @@ -923,8 +957,8 @@ pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } } /// Converts the lower two packed 32-bit integers in `a` to packed @@ -935,9 +969,11 @@ pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { - let a = a.as_i32x4(); - simd_cast::(simd_shuffle!(a, a, [0, 1])) +pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { + unsafe { + let a = a.as_i32x4(); + simd_cast::(simd_shuffle!(a, a, [0, 1])) + } } /// Returns `a` with its lower element replaced by `b` after converting it to @@ -948,8 +984,8 @@ pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { - simd_insert!(a, 0, b as f64) +pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { + unsafe { simd_insert!(a, 0, b as f64) } } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) @@ -960,8 +996,8 @@ pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { - transmute(simd_cast::<_, f32x4>(a.as_i32x4())) +pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { + unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` @@ -972,8 +1008,8 @@ pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { - transmute(cvtps2dq(a)) +pub fn _mm_cvtps_epi32(a: __m128) -> __m128i { + unsafe { transmute(cvtps2dq(a)) } } /// Returns a vector whose lowest element is `a` and all higher elements are @@ -983,8 +1019,8 @@ pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { - transmute(i32x4::new(a, 0, 0, 0)) +pub fn _mm_cvtsi32_si128(a: i32) -> __m128i { + unsafe { transmute(i32x4::new(a, 0, 0, 0)) } } /// Returns the lowest element of `a`. @@ -993,8 +1029,8 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { - simd_extract!(a.as_i32x4(), 0) +pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 { + unsafe { simd_extract!(a.as_i32x4(), 0) } } /// Sets packed 64-bit integers with the supplied values, from highest to @@ -1005,8 +1041,8 @@ pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { - transmute(i64x2::new(e0, e1)) +pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { + unsafe { transmute(i64x2::new(e0, e1)) } } /// Sets packed 32-bit integers with the supplied values. @@ -1016,8 +1052,8 @@ pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { - transmute(i32x4::new(e0, e1, e2, e3)) +pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { + unsafe { transmute(i32x4::new(e0, e1, e2, e3)) } } /// Sets packed 16-bit integers with the supplied values. @@ -1027,7 +1063,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_epi16( +pub fn _mm_set_epi16( e7: i16, e6: i16, e5: i16, @@ -1037,7 +1073,7 @@ pub unsafe fn _mm_set_epi16( e1: i16, e0: i16, ) -> __m128i { - transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) + unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } } /// Sets packed 8-bit integers with the supplied values. @@ -1047,7 +1083,7 @@ pub unsafe fn _mm_set_epi16( #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_epi8( +pub fn _mm_set_epi8( e15: i8, e14: i8, e13: i8, @@ -1065,10 +1101,12 @@ pub unsafe fn _mm_set_epi8( e1: i8, e0: i8, ) -> __m128i { - #[rustfmt::skip] - transmute(i8x16::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - )) + unsafe { + #[rustfmt::skip] + transmute(i8x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + )) + } } /// Broadcasts 64-bit integer `a` to all elements. @@ -1078,7 +1116,7 @@ pub unsafe fn _mm_set_epi8( #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { +pub fn _mm_set1_epi64x(a: i64) -> __m128i { _mm_set_epi64x(a, a) } @@ -1089,7 +1127,7 @@ pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { +pub fn _mm_set1_epi32(a: i32) -> __m128i { _mm_set_epi32(a, a, a, a) } @@ -1100,7 +1138,7 @@ pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { +pub fn _mm_set1_epi16(a: i16) -> __m128i { _mm_set_epi16(a, a, a, a, a, a, a, a) } @@ -1111,7 +1149,7 @@ pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { +pub fn _mm_set1_epi8(a: i8) -> __m128i { _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } @@ -1122,7 +1160,7 @@ pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { +pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { _mm_set_epi32(e0, e1, e2, e3) } @@ -1133,7 +1171,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setr_epi16( +pub fn _mm_setr_epi16( e7: i16, e6: i16, e5: i16, @@ -1153,7 +1191,7 @@ pub unsafe fn _mm_setr_epi16( #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setr_epi8( +pub fn _mm_setr_epi8( e15: i8, e14: i8, e13: i8, @@ -1184,8 +1222,8 @@ pub unsafe fn _mm_setr_epi8( #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setzero_si128() -> __m128i { - const { mem::zeroed() } +pub fn _mm_setzero_si128() -> __m128i { + const { unsafe { mem::zeroed() } } } /// Loads 64-bit integer from memory into first element of returned vector. @@ -1352,9 +1390,11 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { assert_instr(movq) )] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { - let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); - transmute(r) +pub fn _mm_move_epi64(a: __m128i) -> __m128i { + unsafe { + let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); + transmute(r) + } } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -1365,8 +1405,8 @@ pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packsswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(packsswb(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -1377,8 +1417,8 @@ pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(packssdw(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers @@ -1389,8 +1429,8 @@ pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(packuswb(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } } /// Returns the `imm8` element of `a`. @@ -1401,9 +1441,9 @@ pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi16(a: __m128i) -> i32 { +pub fn _mm_extract_epi16(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 3); - simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 + unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 } } /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. @@ -1414,9 +1454,9 @@ pub unsafe fn _mm_extract_epi16(a: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { +pub fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 3); - transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) + unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) } } /// Returns a mask of the most significant bit of each element in `a`. @@ -1426,10 +1466,12 @@ pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { - let z = i8x16::ZERO; - let m: i8x16 = simd_lt(a.as_i8x16(), z); - simd_bitmask::<_, u16>(m) as u32 as i32 +pub fn _mm_movemask_epi8(a: __m128i) -> i32 { + unsafe { + let z = i8x16::ZERO; + let m: i8x16 = simd_lt(a.as_i8x16(), z); + simd_bitmask::<_, u16>(m) as u32 as i32 + } } /// Shuffles 32-bit integers in `a` using the control in `IMM8`. @@ -1440,20 +1482,22 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { +pub fn _mm_shuffle_epi32(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i32x4(); - let x: i32x4 = simd_shuffle!( - a, - a, - [ - IMM8 as u32 & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - ], - ); - transmute(x) + unsafe { + let a = a.as_i32x4(); + let x: i32x4 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + ], + ); + transmute(x) + } } /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in @@ -1468,24 +1512,26 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { +pub fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle!( - a, - a, - [ - 0, - 1, - 2, - 3, - (IMM8 as u32 & 0b11) + 4, - ((IMM8 as u32 >> 2) & 0b11) + 4, - ((IMM8 as u32 >> 4) & 0b11) + 4, - ((IMM8 as u32 >> 6) & 0b11) + 4, - ], - ); - transmute(x) + unsafe { + let a = a.as_i16x8(); + let x: i16x8 = simd_shuffle!( + a, + a, + [ + 0, + 1, + 2, + 3, + (IMM8 as u32 & 0b11) + 4, + ((IMM8 as u32 >> 2) & 0b11) + 4, + ((IMM8 as u32 >> 4) & 0b11) + 4, + ((IMM8 as u32 >> 6) & 0b11) + 4, + ], + ); + transmute(x) + } } /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in @@ -1500,24 +1546,26 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { +pub fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - let a = a.as_i16x8(); - let x: i16x8 = simd_shuffle!( - a, - a, - [ - IMM8 as u32 & 0b11, - (IMM8 as u32 >> 2) & 0b11, - (IMM8 as u32 >> 4) & 0b11, - (IMM8 as u32 >> 6) & 0b11, - 4, - 5, - 6, - 7, - ], - ); - transmute(x) + unsafe { + let a = a.as_i16x8(); + let x: i16x8 = simd_shuffle!( + a, + a, + [ + IMM8 as u32 & 0b11, + (IMM8 as u32 >> 2) & 0b11, + (IMM8 as u32 >> 4) & 0b11, + (IMM8 as u32 >> 6) & 0b11, + 4, + 5, + 6, + 7, + ], + ); + transmute(x) + } } /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. @@ -1527,12 +1575,14 @@ pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!( - a.as_i8x16(), - b.as_i8x16(), - [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], - )) +pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + transmute::(simd_shuffle!( + a.as_i8x16(), + b.as_i8x16(), + [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], + )) + } } /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. @@ -1542,9 +1592,11 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { - let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); - transmute::(x) +pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); + transmute::(x) + } } /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. @@ -1554,8 +1606,8 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) +pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } } /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. @@ -1565,8 +1617,8 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) +pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) } } /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. @@ -1576,12 +1628,14 @@ pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!( - a.as_i8x16(), - b.as_i8x16(), - [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], - )) +pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + transmute::(simd_shuffle!( + a.as_i8x16(), + b.as_i8x16(), + [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], + )) + } } /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. @@ -1591,9 +1645,11 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { - let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); - transmute::(x) +pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); + transmute::(x) + } } /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. @@ -1603,8 +1659,8 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) +pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } } /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. @@ -1614,8 +1670,8 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) +pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute::(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) } } /// Returns a new vector with the low element of `a` replaced by the sum of the @@ -1626,8 +1682,8 @@ pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) +pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } } /// Adds packed double-precision (64-bit) floating-point elements in `a` and @@ -1638,8 +1694,8 @@ pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { - simd_add(a, b) +pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_add(a, b) } } /// Returns a new vector with the low element of `a` replaced by the result of @@ -1650,8 +1706,8 @@ pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) +pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } } /// Divide packed double-precision (64-bit) floating-point elements in `a` by @@ -1662,8 +1718,8 @@ pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { - simd_div(a, b) +pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_div(a, b) } } /// Returns a new vector with the low element of `a` replaced by the maximum @@ -1674,8 +1730,8 @@ pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { - maxsd(a, b) +pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { maxsd(a, b) } } /// Returns a new vector with the maximum values from corresponding elements in @@ -1686,8 +1742,8 @@ pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { - maxpd(a, b) +pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { maxpd(a, b) } } /// Returns a new vector with the low element of `a` replaced by the minimum @@ -1698,8 +1754,8 @@ pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { - minsd(a, b) +pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { minsd(a, b) } } /// Returns a new vector with the minimum values from corresponding elements in @@ -1710,8 +1766,8 @@ pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { - minpd(a, b) +pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { minpd(a, b) } } /// Returns a new vector with the low element of `a` replaced by multiplying the @@ -1722,8 +1778,8 @@ pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) +pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` @@ -1734,8 +1790,8 @@ pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { - simd_mul(a, b) +pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_mul(a, b) } } /// Returns a new vector with the low element of `a` replaced by the square @@ -1746,8 +1802,8 @@ pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) +pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) } } /// Returns a new vector with the square root of each of the values in `a`. @@ -1757,8 +1813,8 @@ pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { - simd_fsqrt(a) +pub fn _mm_sqrt_pd(a: __m128d) -> __m128d { + unsafe { simd_fsqrt(a) } } /// Returns a new vector with the low element of `a` replaced by subtracting the @@ -1769,8 +1825,8 @@ pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) +pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } } /// Subtract packed double-precision (64-bit) floating-point elements in `b` @@ -1781,8 +1837,8 @@ pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { - simd_sub(a, b) +pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_sub(a, b) } } /// Computes the bitwise AND of packed double-precision (64-bit) floating-point @@ -1793,10 +1849,12 @@ pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { - let a: __m128i = transmute(a); - let b: __m128i = transmute(b); - transmute(_mm_and_si128(a, b)) +pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_and_si128(a, b)) + } } /// Computes the bitwise NOT of `a` and then AND with `b`. @@ -1806,10 +1864,12 @@ pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { - let a: __m128i = transmute(a); - let b: __m128i = transmute(b); - transmute(_mm_andnot_si128(a, b)) +pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_andnot_si128(a, b)) + } } /// Computes the bitwise OR of `a` and `b`. @@ -1819,10 +1879,12 @@ pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { - let a: __m128i = transmute(a); - let b: __m128i = transmute(b); - transmute(_mm_or_si128(a, b)) +pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_or_si128(a, b)) + } } /// Computes the bitwise XOR of `a` and `b`. @@ -1832,10 +1894,12 @@ pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { - let a: __m128i = transmute(a); - let b: __m128i = transmute(b); - transmute(_mm_xor_si128(a, b)) +pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a: __m128i = transmute(a); + let b: __m128i = transmute(b); + transmute(_mm_xor_si128(a, b)) + } } /// Returns a new vector with the low element of `a` replaced by the equality @@ -1846,8 +1910,8 @@ pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 0) +pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 0) } } /// Returns a new vector with the low element of `a` replaced by the less-than @@ -1858,8 +1922,8 @@ pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 1) +pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 1) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1870,8 +1934,8 @@ pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 2) +pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 2) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1882,8 +1946,8 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) +pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1894,8 +1958,8 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) +pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) } } /// Returns a new vector with the low element of `a` replaced by the result @@ -1908,8 +1972,8 @@ pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 7) +pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 7) } } /// Returns a new vector with the low element of `a` replaced by the result of @@ -1921,8 +1985,8 @@ pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 3) +pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 3) } } /// Returns a new vector with the low element of `a` replaced by the not-equal @@ -1933,8 +1997,8 @@ pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 4) +pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 4) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1945,8 +2009,8 @@ pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 5) +pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 5) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1957,8 +2021,8 @@ pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { - cmpsd(a, b, 6) +pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmpsd(a, b, 6) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1969,8 +2033,8 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) +pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) } } /// Returns a new vector with the low element of `a` replaced by the @@ -1981,8 +2045,8 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { - simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) +pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) } } /// Compares corresponding elements in `a` and `b` for equality. @@ -1992,8 +2056,8 @@ pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 0) +pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 0) } } /// Compares corresponding elements in `a` and `b` for less-than. @@ -2003,8 +2067,8 @@ pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 1) +pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 1) } } /// Compares corresponding elements in `a` and `b` for less-than-or-equal @@ -2014,8 +2078,8 @@ pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 2) +pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 2) } } /// Compares corresponding elements in `a` and `b` for greater-than. @@ -2025,7 +2089,7 @@ pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmplt_pd(b, a) } @@ -2036,7 +2100,7 @@ pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmple_pd(b, a) } @@ -2047,8 +2111,8 @@ pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 7) +pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 7) } } /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. @@ -2058,8 +2122,8 @@ pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 3) +pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 3) } } /// Compares corresponding elements in `a` and `b` for not-equal. @@ -2069,8 +2133,8 @@ pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 4) +pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 4) } } /// Compares corresponding elements in `a` and `b` for not-less-than. @@ -2080,8 +2144,8 @@ pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 5) +pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 5) } } /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. @@ -2091,8 +2155,8 @@ pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { - cmppd(a, b, 6) +pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { cmppd(a, b, 6) } } /// Compares corresponding elements in `a` and `b` for not-greater-than. @@ -2102,7 +2166,7 @@ pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnlt_pd(b, a) } @@ -2114,7 +2178,7 @@ pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnle_pd(b, a) } @@ -2125,8 +2189,8 @@ pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { - comieqsd(a, b) +pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comieqsd(a, b) } } /// Compares the lower element of `a` and `b` for less-than. @@ -2136,8 +2200,8 @@ pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { - comiltsd(a, b) +pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comiltsd(a, b) } } /// Compares the lower element of `a` and `b` for less-than-or-equal. @@ -2147,8 +2211,8 @@ pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { - comilesd(a, b) +pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comilesd(a, b) } } /// Compares the lower element of `a` and `b` for greater-than. @@ -2158,8 +2222,8 @@ pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { - comigtsd(a, b) +pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comigtsd(a, b) } } /// Compares the lower element of `a` and `b` for greater-than-or-equal. @@ -2169,8 +2233,8 @@ pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { - comigesd(a, b) +pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comigesd(a, b) } } /// Compares the lower element of `a` and `b` for not-equal. @@ -2180,8 +2244,8 @@ pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { - comineqsd(a, b) +pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { comineqsd(a, b) } } /// Compares the lower element of `a` and `b` for equality. @@ -2191,8 +2255,8 @@ pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { - ucomieqsd(a, b) +pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomieqsd(a, b) } } /// Compares the lower element of `a` and `b` for less-than. @@ -2202,8 +2266,8 @@ pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { - ucomiltsd(a, b) +pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomiltsd(a, b) } } /// Compares the lower element of `a` and `b` for less-than-or-equal. @@ -2213,8 +2277,8 @@ pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { - ucomilesd(a, b) +pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomilesd(a, b) } } /// Compares the lower element of `a` and `b` for greater-than. @@ -2224,8 +2288,8 @@ pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { - ucomigtsd(a, b) +pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomigtsd(a, b) } } /// Compares the lower element of `a` and `b` for greater-than-or-equal. @@ -2235,8 +2299,8 @@ pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { - ucomigesd(a, b) +pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomigesd(a, b) } } /// Compares the lower element of `a` and `b` for not-equal. @@ -2246,8 +2310,8 @@ pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { - ucomineqsd(a, b) +pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { + unsafe { ucomineqsd(a, b) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` to @@ -2258,10 +2322,12 @@ pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { - let r = simd_cast::<_, f32x2>(a.as_f64x2()); - let zero = f32x2::ZERO; - transmute::(simd_shuffle!(r, zero, [0, 1, 2, 3])) +pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 { + unsafe { + let r = simd_cast::<_, f32x2>(a.as_f64x2()); + let zero = f32x2::ZERO; + transmute::(simd_shuffle!(r, zero, [0, 1, 2, 3])) + } } /// Converts packed single-precision (32-bit) floating-point elements in `a` to @@ -2273,9 +2339,11 @@ pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { - let a = a.as_f32x4(); - transmute(simd_cast::(simd_shuffle!(a, a, [0, 1]))) +pub fn _mm_cvtps_pd(a: __m128) -> __m128d { + unsafe { + let a = a.as_f32x4(); + transmute(simd_cast::(simd_shuffle!(a, a, [0, 1]))) + } } /// Converts packed double-precision (64-bit) floating-point elements in `a` to @@ -2286,8 +2354,8 @@ pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { - transmute(cvtpd2dq(a)) +pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { + unsafe { transmute(cvtpd2dq(a)) } } /// Converts the lower double-precision (64-bit) floating-point element in a to @@ -2298,8 +2366,8 @@ pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { - cvtsd2si(a) +pub fn _mm_cvtsd_si32(a: __m128d) -> i32 { + unsafe { cvtsd2si(a) } } /// Converts the lower double-precision (64-bit) floating-point element in `b` @@ -2312,8 +2380,8 @@ pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { - cvtsd2ss(a, b) +pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { + unsafe { cvtsd2ss(a, b) } } /// Returns the lower double-precision (64-bit) floating-point element of `a`. @@ -2322,8 +2390,8 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { - simd_extract!(a, 0) +pub fn _mm_cvtsd_f64(a: __m128d) -> f64 { + unsafe { simd_extract!(a, 0) } } /// Converts the lower single-precision (32-bit) floating-point element in `b` @@ -2336,8 +2404,8 @@ pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtss2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { - cvtss2sd(a, b) +pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { + unsafe { cvtss2sd(a, b) } } /// Converts packed double-precision (64-bit) floating-point elements in `a` to @@ -2348,8 +2416,8 @@ pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { - transmute(cvttpd2dq(a)) +pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { + unsafe { transmute(cvttpd2dq(a)) } } /// Converts the lower double-precision (64-bit) floating-point element in `a` @@ -2360,8 +2428,8 @@ pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { - cvttsd2si(a) +pub fn _mm_cvttsd_si32(a: __m128d) -> i32 { + unsafe { cvttsd2si(a) } } /// Converts packed single-precision (32-bit) floating-point elements in `a` to @@ -2372,8 +2440,8 @@ pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { - transmute(cvttps2dq(a)) +pub fn _mm_cvttps_epi32(a: __m128) -> __m128i { + unsafe { transmute(cvttps2dq(a)) } } /// Copies double-precision (64-bit) floating-point element `a` to the lower @@ -2383,7 +2451,7 @@ pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_sd(a: f64) -> __m128d { +pub fn _mm_set_sd(a: f64) -> __m128d { _mm_set_pd(0.0, a) } @@ -2394,7 +2462,7 @@ pub unsafe fn _mm_set_sd(a: f64) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { +pub fn _mm_set1_pd(a: f64) -> __m128d { _mm_set_pd(a, a) } @@ -2405,7 +2473,7 @@ pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { +pub fn _mm_set_pd1(a: f64) -> __m128d { _mm_set_pd(a, a) } @@ -2416,7 +2484,7 @@ pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { +pub fn _mm_set_pd(a: f64, b: f64) -> __m128d { __m128d([b, a]) } @@ -2427,7 +2495,7 @@ pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { +pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d { _mm_set_pd(b, a) } @@ -2439,8 +2507,8 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorp))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_setzero_pd() -> __m128d { - const { mem::zeroed() } +pub fn _mm_setzero_pd() -> __m128d { + const { unsafe { mem::zeroed() } } } /// Returns a mask of the most significant bit of each element in `a`. @@ -2453,11 +2521,13 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { +pub fn _mm_movemask_pd(a: __m128d) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO); - simd_bitmask::(mask).into() + unsafe { + let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO); + simd_bitmask::(mask).into() + } } /// Loads 128-bits (composed of 2 packed double-precision (64-bit) @@ -2802,9 +2872,9 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { #[cfg_attr(test, assert_instr(shufps, MASK = 2))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(MASK, 8); - simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) + unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) } } /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower @@ -2816,8 +2886,8 @@ pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d) -> __m128d #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { - _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) +pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) } } /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit @@ -2827,8 +2897,8 @@ pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { - transmute(a) +pub fn _mm_castpd_ps(a: __m128d) -> __m128 { + unsafe { transmute(a) } } /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit @@ -2838,8 +2908,8 @@ pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { - transmute(a) +pub fn _mm_castpd_si128(a: __m128d) -> __m128i { + unsafe { transmute(a) } } /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit @@ -2849,8 +2919,8 @@ pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { - transmute(a) +pub fn _mm_castps_pd(a: __m128) -> __m128d { + unsafe { transmute(a) } } /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit @@ -2860,8 +2930,8 @@ pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { - transmute(a) +pub fn _mm_castps_si128(a: __m128) -> __m128i { + unsafe { transmute(a) } } /// Casts a 128-bit integer vector into a 128-bit floating-point vector @@ -2871,8 +2941,8 @@ pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { - transmute(a) +pub fn _mm_castsi128_pd(a: __m128i) -> __m128d { + unsafe { transmute(a) } } /// Casts a 128-bit integer vector into a 128-bit floating-point vector @@ -2882,8 +2952,8 @@ pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { - transmute(a) +pub fn _mm_castsi128_ps(a: __m128i) -> __m128 { + unsafe { transmute(a) } } /// Returns vector of type __m128d with indeterminate elements. @@ -2894,8 +2964,8 @@ pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_undefined_pd() -> __m128d { - const { mem::zeroed() } +pub fn _mm_undefined_pd() -> __m128d { + const { unsafe { mem::zeroed() } } } /// Returns vector of type __m128i with indeterminate elements. @@ -2906,8 +2976,8 @@ pub unsafe fn _mm_undefined_pd() -> __m128d { #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_undefined_si128() -> __m128i { - const { mem::zeroed() } +pub fn _mm_undefined_si128() -> __m128i { + const { unsafe { mem::zeroed() } } } /// The resulting `__m128d` element is composed by the low-order values of @@ -2921,8 +2991,8 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { - simd_shuffle!(a, b, [1, 3]) +pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, b, [1, 3]) } } /// The resulting `__m128d` element is composed by the high-order values of @@ -2936,8 +3006,8 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { - simd_shuffle!(a, b, [0, 2]) +pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, b, [0, 2]) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/sse3.rs b/crates/core_arch/src/x86/sse3.rs index 35960441fd..7a32cfe472 100644 --- a/crates/core_arch/src/x86/sse3.rs +++ b/crates/core_arch/src/x86/sse3.rs @@ -14,12 +14,14 @@ use stdarch_test::assert_instr; #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { - let a = a.as_f32x4(); - let b = b.as_f32x4(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [4, 1, 6, 3]) +pub fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { + let a = a.as_f32x4(); + let b = b.as_f32x4(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [4, 1, 6, 3]) + } } /// Alternatively add and subtract packed double-precision (64-bit) @@ -30,12 +32,14 @@ pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { - let a = a.as_f64x2(); - let b = b.as_f64x2(); - let add = simd_add(a, b); - let sub = simd_sub(a, b); - simd_shuffle!(add, sub, [2, 1]) +pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + let a = a.as_f64x2(); + let b = b.as_f64x2(); + let add = simd_add(a, b); + let sub = simd_sub(a, b); + simd_shuffle!(add, sub, [2, 1]) + } } /// Horizontally adds adjacent pairs of double-precision (64-bit) @@ -46,8 +50,8 @@ pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { - haddpd(a, b) +pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { haddpd(a, b) } } /// Horizontally adds adjacent pairs of single-precision (32-bit) @@ -58,8 +62,8 @@ pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { - haddps(a, b) +pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { + unsafe { haddps(a, b) } } /// Horizontally subtract adjacent pairs of double-precision (64-bit) @@ -70,8 +74,8 @@ pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { - hsubpd(a, b) +pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { hsubpd(a, b) } } /// Horizontally adds adjacent pairs of single-precision (32-bit) @@ -82,8 +86,8 @@ pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { - hsubps(a, b) +pub fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { + unsafe { hsubps(a, b) } } /// Loads 128-bits of integer data from unaligned memory. @@ -107,8 +111,8 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movddup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d { - simd_shuffle!(a, a, [0, 0]) +pub fn _mm_movedup_pd(a: __m128d) -> __m128d { + unsafe { simd_shuffle!(a, a, [0, 0]) } } /// Loads a double-precision (64-bit) floating-point element from memory @@ -131,8 +135,8 @@ pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { - simd_shuffle!(a, a, [1, 1, 3, 3]) +pub fn _mm_movehdup_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, a, [1, 1, 3, 3]) } } /// Duplicate even-indexed single-precision (32-bit) floating-point elements @@ -143,8 +147,8 @@ pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 { - simd_shuffle!(a, a, [0, 0, 2, 2]) +pub fn _mm_moveldup_ps(a: __m128) -> __m128 { + unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 21a580e44f..1970bdf7b0 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -59,9 +59,11 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { - let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); - transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) +pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { + unsafe { + let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); + transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) + } } /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. @@ -76,22 +78,24 @@ pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute::(simd_shuffle!( - a.as_i16x8(), - b.as_i16x8(), - [ - [0, 8][IMM8 as usize & 1], - [1, 9][(IMM8 >> 1) as usize & 1], - [2, 10][(IMM8 >> 2) as usize & 1], - [3, 11][(IMM8 >> 3) as usize & 1], - [4, 12][(IMM8 >> 4) as usize & 1], - [5, 13][(IMM8 >> 5) as usize & 1], - [6, 14][(IMM8 >> 6) as usize & 1], - [7, 15][(IMM8 >> 7) as usize & 1], - ] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_i16x8(), + b.as_i16x8(), + [ + [0, 8][IMM8 as usize & 1], + [1, 9][(IMM8 >> 1) as usize & 1], + [2, 10][(IMM8 >> 2) as usize & 1], + [3, 11][(IMM8 >> 3) as usize & 1], + [4, 12][(IMM8 >> 4) as usize & 1], + [5, 13][(IMM8 >> 5) as usize & 1], + [6, 14][(IMM8 >> 6) as usize & 1], + [7, 15][(IMM8 >> 7) as usize & 1], + ] + )) + } } /// Blend packed double-precision (64-bit) floating-point elements from `a` @@ -102,9 +106,11 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { - let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); - transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) +pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { + unsafe { + let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); + transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) + } } /// Blend packed single-precision (32-bit) floating-point elements from `a` @@ -115,9 +121,11 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { - let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); - transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) +pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { + unsafe { + let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); + transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) + } } /// Blend packed double-precision (64-bit) floating-point elements from `a` @@ -132,13 +140,15 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(IMM2, 2); - transmute::(simd_shuffle!( - a.as_f64x2(), - b.as_f64x2(), - [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_f64x2(), + b.as_f64x2(), + [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] + )) + } } /// Blend packed single-precision (32-bit) floating-point elements from `a` @@ -150,18 +160,20 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM4, 4); - transmute::(simd_shuffle!( - a.as_f32x4(), - b.as_f32x4(), - [ - [0, 4][IMM4 as usize & 1], - [1, 5][(IMM4 >> 1) as usize & 1], - [2, 6][(IMM4 >> 2) as usize & 1], - [3, 7][(IMM4 >> 3) as usize & 1], - ] - )) + unsafe { + transmute::(simd_shuffle!( + a.as_f32x4(), + b.as_f32x4(), + [ + [0, 4][IMM4 as usize & 1], + [1, 5][(IMM4 >> 1) as usize & 1], + [2, 6][(IMM4 >> 2) as usize & 1], + [3, 7][(IMM4 >> 3) as usize & 1], + ] + )) + } } /// Extracts a single-precision (32-bit) floating-point element from `a`, @@ -194,9 +206,9 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { +pub fn _mm_extract_ps(a: __m128) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 + unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 } } /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit @@ -210,9 +222,9 @@ pub unsafe fn _mm_extract_ps(a: __m128) -> i32 { #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { +pub fn _mm_extract_epi8(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 4); - simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 + unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 } } /// Extracts an 32-bit integer from `a` selected with `IMM8` @@ -223,9 +235,9 @@ pub unsafe fn _mm_extract_epi8(a: __m128i) -> i32 { #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi32(a: __m128i) -> i32 { +pub fn _mm_extract_epi32(a: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 2); - simd_extract!(a.as_i32x4(), IMM8 as u32, i32) + unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) } } /// Select a single value in `b` to store at some position in `a`, @@ -257,9 +269,9 @@ pub unsafe fn _mm_extract_epi32(a: __m128i) -> i32 { #[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - insertps(a, b, IMM8 as u8) + unsafe { insertps(a, b, IMM8 as u8) } } /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a @@ -271,9 +283,9 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { +pub fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 4); - transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) + unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) } } /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a @@ -285,9 +297,9 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32) -> __m128i { #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { +pub fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 2); - transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) + unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) } } /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum @@ -298,10 +310,12 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i8x16(); - let b = b.as_i8x16(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -312,10 +326,12 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u16x8(); - let b = b.as_u16x8(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum @@ -326,10 +342,12 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i32x4(); - let b = b.as_i32x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -340,10 +358,12 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u32x4(); - let b = b.as_u32x4(); - transmute(simd_select::(simd_gt(a, b), a, b)) +pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) + } } /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum @@ -354,10 +374,12 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i8x16(); - let b = b.as_i8x16(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -368,10 +390,12 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u16x8(); - let b = b.as_u16x8(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum @@ -382,10 +406,12 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_i32x4(); - let b = b.as_i32x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -396,10 +422,12 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminud))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { - let a = a.as_u32x4(); - let b = b.as_u32x4(); - transmute(simd_select::(simd_lt(a, b), a, b)) +pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) + } } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -410,8 +438,8 @@ pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(packusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(packusdw(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } } /// Compares packed 64-bit integers in `a` and `b` for equality @@ -421,8 +449,8 @@ pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } } /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers @@ -432,10 +460,12 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute(simd_cast::<_, i16x8>(a)) +pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } } /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers @@ -445,10 +475,12 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed @@ -459,10 +491,12 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let a: i8x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let a: i8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers @@ -472,10 +506,12 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers @@ -485,10 +521,12 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let a: i16x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let a: i16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers @@ -498,10 +536,12 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { - let a = a.as_i32x4(); - let a: i32x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let a: i32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers @@ -511,10 +551,12 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); - transmute(simd_cast::<_, i16x8>(a)) +pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); + transmute(simd_cast::<_, i16x8>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers @@ -524,10 +566,12 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers @@ -537,10 +581,12 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { - let a = a.as_u8x16(); - let a: u8x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u8x16(); + let a: u8x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 16-bit integers in `a` @@ -551,10 +597,12 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { - let a = a.as_u16x8(); - let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); - transmute(simd_cast::<_, i32x4>(a)) +pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); + transmute(simd_cast::<_, i32x4>(a)) + } } /// Zeroes extend packed unsigned 16-bit integers in `a` @@ -565,10 +613,12 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { - let a = a.as_u16x8(); - let a: u16x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u16x8(); + let a: u16x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Zeroes extend packed unsigned 32-bit integers in `a` @@ -579,10 +629,12 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { - let a = a.as_u32x4(); - let a: u32x2 = simd_shuffle!(a, a, [0, 1]); - transmute(simd_cast::<_, i64x2>(a)) +pub fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { + unsafe { + let a = a.as_u32x4(); + let a: u32x2 = simd_shuffle!(a, a, [0, 1]); + transmute(simd_cast::<_, i64x2>(a)) + } } /// Returns the dot product of two __m128d vectors. @@ -599,9 +651,11 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(dppd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { - static_assert_uimm_bits!(IMM8, 8); - dppd(a, b, IMM8 as u8) +pub fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { + unsafe { + static_assert_uimm_bits!(IMM8, 8); + dppd(a, b, IMM8 as u8) + } } /// Returns the dot product of two __m128 vectors. @@ -618,9 +672,9 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(dpps, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { +pub fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); - dpps(a, b, IMM8 as u8) + unsafe { dpps(a, b, IMM8 as u8) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -632,8 +686,8 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { - simd_floor(a) +pub fn _mm_floor_pd(a: __m128d) -> __m128d { + unsafe { simd_floor(a) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -645,8 +699,8 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { - simd_floor(a) +pub fn _mm_floor_ps(a: __m128) -> __m128 { + unsafe { simd_floor(a) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -660,8 +714,8 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { - roundsd(a, b, _MM_FROUND_FLOOR) +pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_FLOOR) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -675,8 +729,8 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { - roundss(a, b, _MM_FROUND_FLOOR) +pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_FLOOR) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -688,8 +742,8 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { - simd_ceil(a) +pub fn _mm_ceil_pd(a: __m128d) -> __m128d { + unsafe { simd_ceil(a) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -701,8 +755,8 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { - simd_ceil(a) +pub fn _mm_ceil_ps(a: __m128) -> __m128 { + unsafe { simd_ceil(a) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -716,8 +770,8 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { - roundsd(a, b, _MM_FROUND_CEIL) +pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { + unsafe { roundsd(a, b, _MM_FROUND_CEIL) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -731,8 +785,8 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { - roundss(a, b, _MM_FROUND_CEIL) +pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { + unsafe { roundss(a, b, _MM_FROUND_CEIL) } } /// Round the packed double-precision (64-bit) floating-point elements in `a` @@ -752,9 +806,9 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_pd(a: __m128d) -> __m128d { +pub fn _mm_round_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(ROUNDING, 4); - roundpd(a, ROUNDING) + unsafe { roundpd(a, ROUNDING) } } /// Round the packed single-precision (32-bit) floating-point elements in `a` @@ -774,9 +828,9 @@ pub unsafe fn _mm_round_pd(a: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_ps(a: __m128) -> __m128 { +pub fn _mm_round_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(ROUNDING, 4); - roundps(a, ROUNDING) + unsafe { roundps(a, ROUNDING) } } /// Round the lower double-precision (64-bit) floating-point element in `b` @@ -798,9 +852,9 @@ pub unsafe fn _mm_round_ps(a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d { +pub fn _mm_round_sd(a: __m128d, b: __m128d) -> __m128d { static_assert_uimm_bits!(ROUNDING, 4); - roundsd(a, b, ROUNDING) + unsafe { roundsd(a, b, ROUNDING) } } /// Round the lower single-precision (32-bit) floating-point element in `b` @@ -822,9 +876,9 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d) -> __m12 #[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_round_ss(a: __m128, b: __m128) -> __m128 { +pub fn _mm_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(ROUNDING, 4); - roundss(a, b, ROUNDING) + unsafe { roundss(a, b, ROUNDING) } } /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, @@ -852,8 +906,8 @@ pub unsafe fn _mm_round_ss(a: __m128, b: __m128) -> __m128 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(phminposuw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { - transmute(phminposuw(a.as_u16x8())) +pub fn _mm_minpos_epu16(a: __m128i) -> __m128i { + unsafe { transmute(phminposuw(a.as_u16x8())) } } /// Multiplies the low 32-bit integers from each packed 64-bit @@ -864,10 +918,12 @@ pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmuldq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { - let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); - let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); - transmute(simd_mul(a, b)) +pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); + let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); + transmute(simd_mul(a, b)) + } } /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate @@ -882,8 +938,8 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmulld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } } /// Subtracts 8-bit unsigned integer values and computes the absolute @@ -924,9 +980,9 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 3); - transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) + unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -948,8 +1004,8 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i) -> __m12 #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - ptestz(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -971,8 +1027,8 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestc(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are @@ -994,8 +1050,8 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestnzc(a.as_i64x2(), mask.as_i64x2()) +pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { + unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) } } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -1017,7 +1073,7 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { +pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testz_si128(a, mask) } @@ -1039,7 +1095,7 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { +pub fn _mm_test_all_ones(a: __m128i) -> i32 { _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } @@ -1062,7 +1118,7 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { +pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testnzc_si128(a, mask) } diff --git a/crates/core_arch/src/x86/sse42.rs b/crates/core_arch/src/x86/sse42.rs index 206e9014b0..3639126c12 100644 --- a/crates/core_arch/src/x86/sse42.rs +++ b/crates/core_arch/src/x86/sse42.rs @@ -73,9 +73,9 @@ pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; #[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) + unsafe { transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -261,9 +261,9 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -276,9 +276,9 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -291,9 +291,9 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -306,9 +306,9 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -320,9 +320,9 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings with implicit lengths in `a` and `b` using the @@ -335,9 +335,9 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32 { +pub fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) + unsafe { pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -349,9 +349,9 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { +pub fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { static_assert_uimm_bits!(IMM8, 8); - transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) + unsafe { transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) } } /// Compares packed strings `a` and `b` with lengths `la` and `lb` using the @@ -438,9 +438,9 @@ pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -453,9 +453,9 @@ pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -468,9 +468,9 @@ pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -483,9 +483,9 @@ pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -498,9 +498,9 @@ pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` @@ -514,9 +514,9 @@ pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { +pub fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_uimm_bits!(IMM8, 8); - pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) + unsafe { pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } } /// Starting with the initial value in `crc`, return the accumulated @@ -527,8 +527,8 @@ pub unsafe fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { - crc32_32_8(crc, v) +pub fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { + unsafe { crc32_32_8(crc, v) } } /// Starting with the initial value in `crc`, return the accumulated @@ -539,8 +539,8 @@ pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { - crc32_32_16(crc, v) +pub fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { + unsafe { crc32_32_16(crc, v) } } /// Starting with the initial value in `crc`, return the accumulated @@ -551,8 +551,8 @@ pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { - crc32_32_32(crc, v) +pub fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { + unsafe { crc32_32_32(crc, v) } } /// Compares packed 64-bit integers in `a` and `b` for greater-than, @@ -563,8 +563,8 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpgtq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) +pub fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/sse4a.rs b/crates/core_arch/src/x86/sse4a.rs index 16642a0df0..051b77d02d 100644 --- a/crates/core_arch/src/x86/sse4a.rs +++ b/crates/core_arch/src/x86/sse4a.rs @@ -36,8 +36,8 @@ unsafe extern "C" { #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(extrq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { - transmute(extrq(x.as_i64x2(), y.as_i8x16())) +pub fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { + unsafe { transmute(extrq(x.as_i64x2(), y.as_i8x16())) } } /// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the @@ -53,12 +53,12 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))] #[rustc_legacy_const_generics(1, 2)] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _mm_extracti_si64(x: __m128i) -> __m128i { +pub fn _mm_extracti_si64(x: __m128i) -> __m128i { // LLVM mentions that it is UB if these are not satisfied static_assert_uimm_bits!(LEN, 6); static_assert_uimm_bits!(IDX, 6); static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); - transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8)) + unsafe { transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8)) } } /// Inserts the `[length:0]` bits of `y` into `x` at `index`. @@ -74,8 +74,8 @@ pub unsafe fn _mm_extracti_si64(x: __m128i) -> _ #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(insertq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { - transmute(insertq(x.as_i64x2(), y.as_i64x2())) +pub fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { + unsafe { transmute(insertq(x.as_i64x2(), y.as_i64x2())) } } /// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into @@ -89,12 +89,12 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))] #[rustc_legacy_const_generics(2, 3)] #[stable(feature = "simd_x86_updates", since = "1.82.0")] -pub unsafe fn _mm_inserti_si64(x: __m128i, y: __m128i) -> __m128i { +pub fn _mm_inserti_si64(x: __m128i, y: __m128i) -> __m128i { // LLVM mentions that it is UB if these are not satisfied static_assert_uimm_bits!(LEN, 6); static_assert_uimm_bits!(IDX, 6); static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64)); - transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8)) + unsafe { transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8)) } } /// Non-temporal store of `a.0` into `p`. diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index ce8d749c80..2be182e88f 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -16,11 +16,13 @@ use stdarch_test::assert_instr; #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { - let a = a.as_i8x16(); - let zero = i8x16::ZERO; - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); - transmute(r) +pub fn _mm_abs_epi8(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i8x16(); + let zero = i8x16::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } } /// Computes the absolute value of each of the packed 16-bit signed integers in @@ -32,11 +34,13 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { - let a = a.as_i16x8(); - let zero = i16x8::ZERO; - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); - transmute(r) +pub fn _mm_abs_epi16(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i16x8(); + let zero = i16x8::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } } /// Computes the absolute value of each of the packed 32-bit signed integers in @@ -48,11 +52,13 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { - let a = a.as_i32x4(); - let zero = i32x4::ZERO; - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); - transmute(r) +pub fn _mm_abs_epi32(a: __m128i) -> __m128i { + unsafe { + let a = a.as_i32x4(); + let zero = i32x4::ZERO; + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) + } } /// Shuffles bytes from `a` according to the content of `b`. @@ -85,8 +91,8 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pshufb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) +pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) } } /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result, @@ -98,7 +104,7 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(palignr, IMM8 = 15))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128i { +pub fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. @@ -122,29 +128,31 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128 shift + i } } - let r: i8x16 = simd_shuffle!( - b.as_i8x16(), - a.as_i8x16(), - [ - mask(IMM8 as u32, 0), - mask(IMM8 as u32, 1), - mask(IMM8 as u32, 2), - mask(IMM8 as u32, 3), - mask(IMM8 as u32, 4), - mask(IMM8 as u32, 5), - mask(IMM8 as u32, 6), - mask(IMM8 as u32, 7), - mask(IMM8 as u32, 8), - mask(IMM8 as u32, 9), - mask(IMM8 as u32, 10), - mask(IMM8 as u32, 11), - mask(IMM8 as u32, 12), - mask(IMM8 as u32, 13), - mask(IMM8 as u32, 14), - mask(IMM8 as u32, 15), - ], - ); - transmute(r) + unsafe { + let r: i8x16 = simd_shuffle!( + b.as_i8x16(), + a.as_i8x16(), + [ + mask(IMM8 as u32, 0), + mask(IMM8 as u32, 1), + mask(IMM8 as u32, 2), + mask(IMM8 as u32, 3), + mask(IMM8 as u32, 4), + mask(IMM8 as u32, 5), + mask(IMM8 as u32, 6), + mask(IMM8 as u32, 7), + mask(IMM8 as u32, 8), + mask(IMM8 as u32, 9), + mask(IMM8 as u32, 10), + mask(IMM8 as u32, 11), + mask(IMM8 as u32, 12), + mask(IMM8 as u32, 13), + mask(IMM8 as u32, 14), + mask(IMM8 as u32, 15), + ], + ); + transmute(r) + } } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -155,8 +163,8 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128 #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -168,8 +176,8 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) } } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -180,8 +188,8 @@ pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) } } /// Horizontally subtract the adjacent pairs of values contained in 2 @@ -192,8 +200,8 @@ pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) } } /// Horizontally subtract the adjacent pairs of values contained in 2 @@ -206,8 +214,8 @@ pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) } } /// Horizontally subtract the adjacent pairs of values contained in 2 @@ -218,8 +226,8 @@ pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -233,8 +241,8 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmaddubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) +pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) } } /// Multiplies packed 16-bit signed integer values, truncate the 32-bit @@ -246,8 +254,8 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmulhrsw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) } } /// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit @@ -260,8 +268,8 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(psignb128(a.as_i8x16(), b.as_i8x16())) +pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) } } /// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit @@ -274,8 +282,8 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(psignw128(a.as_i16x8(), b.as_i16x8())) +pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) } } /// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit @@ -288,8 +296,8 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(psignd128(a.as_i32x4(), b.as_i32x4())) +pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86_64/abm.rs b/crates/core_arch/src/x86_64/abm.rs index 764e903ed9..bf59cc4632 100644 --- a/crates/core_arch/src/x86_64/abm.rs +++ b/crates/core_arch/src/x86_64/abm.rs @@ -29,7 +29,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _lzcnt_u64(x: u64) -> u64 { +pub fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } @@ -40,7 +40,7 @@ pub unsafe fn _lzcnt_u64(x: u64) -> u64 { #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _popcnt64(x: i64) -> i32 { +pub fn _popcnt64(x: i64) -> i32 { x.count_ones() as i32 } diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs index 71b8290eb8..b494385e4a 100644 --- a/crates/core_arch/src/x86_64/avx.rs +++ b/crates/core_arch/src/x86_64/avx.rs @@ -24,9 +24,9 @@ use crate::{core_arch::x86::*, mem::transmute}; #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { +pub fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { static_assert_uimm_bits!(INDEX, 2); - transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) + unsafe { transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) } } /// Extracts a 64-bit integer from `a`, selected with `INDEX`. @@ -37,9 +37,9 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m25 #[rustc_legacy_const_generics(1)] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { +pub fn _mm256_extract_epi64(a: __m256i) -> i64 { static_assert_uimm_bits!(INDEX, 2); - simd_extract!(a.as_i64x4(), INDEX as u32) + unsafe { simd_extract!(a.as_i64x4(), INDEX as u32) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86_64/avx512bw.rs b/crates/core_arch/src/x86_64/avx512bw.rs index 798fc4adf6..43999b2a50 100644 --- a/crates/core_arch/src/x86_64/avx512bw.rs +++ b/crates/core_arch/src/x86_64/avx512bw.rs @@ -6,7 +6,7 @@ use crate::core_arch::x86::*; #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 { +pub fn _cvtmask64_u64(a: __mmask64) -> u64 { a } @@ -16,7 +16,7 @@ pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 { #[inline] #[target_feature(enable = "avx512bw")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _cvtu64_mask64(a: u64) -> __mmask64 { +pub fn _cvtu64_mask64(a: u64) -> __mmask64 { a } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index c1c79585b0..946b900a2b 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -13,7 +13,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si))] -pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 { +pub fn _mm_cvtsd_i64(a: __m128d) -> i64 { _mm_cvtsd_si64(a) } @@ -24,7 +24,7 @@ pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si))] -pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 { +pub fn _mm_cvtss_i64(a: __m128) -> i64 { _mm_cvtss_si64(a) } @@ -35,8 +35,8 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2usi))] -pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 { - vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtss_u64(a: __m128) -> u64 { + unsafe { vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. @@ -46,8 +46,8 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2usi))] -pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 { - vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsd_u64(a: __m128d) -> u64 { + unsafe { vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -57,9 +57,11 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss))] -pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { - let b = b as f32; - simd_insert!(a, 0, b) +pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } } /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -69,9 +71,11 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2sd))] -pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { - let b = b as f64; - simd_insert!(a, 0, b) +pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } } /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -81,9 +85,11 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2ss))] -pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { - let b = b as f32; - simd_insert!(a, 0, b) +pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { + unsafe { + let b = b as f32; + simd_insert!(a, 0, b) + } } /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -93,9 +99,11 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2sd))] -pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { - let b = b as f64; - simd_insert!(a, 0, b) +pub fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { + unsafe { + let b = b as f64; + simd_insert!(a, 0, b) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. @@ -105,8 +113,8 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si))] -pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { - vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsd_i64(a: __m128d) -> i64 { + unsafe { vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. @@ -116,8 +124,8 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2usi))] -pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { - vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsd_u64(a: __m128d) -> u64 { + unsafe { vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. @@ -127,8 +135,8 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si))] -pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { - vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttss_i64(a: __m128) -> i64 { + unsafe { vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. @@ -138,8 +146,8 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2usi))] -pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { - vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttss_u64(a: __m128) -> u64 { + unsafe { vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } } /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -156,11 +164,13 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let r = vcvtsi2sd64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) + } } /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -177,11 +187,13 @@ pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let r = vcvtsi2sd64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) + } } /// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -198,11 +210,13 @@ pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtsi2ss64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) + } } /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ @@ -219,11 +233,13 @@ pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - let r = vcvtusi2sd64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtusi2sd64(a, b, ROUNDING); + transmute(r) + } } /// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -240,11 +256,13 @@ pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtsi2ss64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) + } } /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ @@ -261,11 +279,13 @@ pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - let r = vcvtusi2ss64(a, b, ROUNDING); - transmute(r) +pub fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtusi2ss64(a, b, ROUNDING); + transmute(r) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ @@ -282,10 +302,12 @@ pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2si64(a, ROUNDING) +pub fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si64(a, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ @@ -302,10 +324,12 @@ pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2si64(a, ROUNDING) +pub fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2si64(a, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ @@ -322,10 +346,12 @@ pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f64x2(); - vcvtsd2usi64(a, ROUNDING) +pub fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + vcvtsd2usi64(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ @@ -342,10 +368,12 @@ pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2si64(a, ROUNDING) +pub fn _mm_cvt_roundss_si64(a: __m128) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si64(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ @@ -362,10 +390,12 @@ pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2si64(a, ROUNDING) +pub fn _mm_cvt_roundss_i64(a: __m128) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2si64(a, ROUNDING) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ @@ -382,10 +412,12 @@ pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { - static_assert_rounding!(ROUNDING); - let a = a.as_f32x4(); - vcvtss2usi64(a, ROUNDING) +pub fn _mm_cvt_roundss_u64(a: __m128) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + vcvtss2usi64(a, ROUNDING) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -397,10 +429,12 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2si64(a, SAE) +pub fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si64(a, SAE) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -412,10 +446,12 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2si64(a, SAE) +pub fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2si64(a, SAE) + } } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ @@ -427,10 +463,12 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { - static_assert_sae!(SAE); - let a = a.as_f64x2(); - vcvttsd2usi64(a, SAE) +pub fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + vcvttsd2usi64(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -442,10 +480,12 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2si64(a, SAE) +pub fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si64(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -457,10 +497,12 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2si64(a, SAE) +pub fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2si64(a, SAE) + } } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ @@ -472,10 +514,12 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] -pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { - static_assert_sae!(SAE); - let a = a.as_f32x4(); - vcvttss2usi64(a, SAE) +pub fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { + unsafe { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + vcvttss2usi64(a, SAE) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86_64/avx512fp16.rs b/crates/core_arch/src/x86_64/avx512fp16.rs index dbf88ab57f..69f1dcb5c7 100644 --- a/crates/core_arch/src/x86_64/avx512fp16.rs +++ b/crates/core_arch/src/x86_64/avx512fp16.rs @@ -11,8 +11,8 @@ use stdarch_test::assert_instr; #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsi2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h { - vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h { + unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) } } /// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the @@ -33,9 +33,11 @@ pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h { #[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundi64_sh(a: __m128h, b: i64) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtsi642sh(a, b, ROUNDING) +pub fn _mm_cvt_roundi64_sh(a: __m128h, b: i64) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsi642sh(a, b, ROUNDING) + } } /// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the @@ -47,8 +49,8 @@ pub unsafe fn _mm_cvt_roundi64_sh(a: __m128h, b: i64) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtusi2sh))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h { - vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h { + unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) } } /// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the @@ -69,9 +71,11 @@ pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h { #[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __m128h { - static_assert_rounding!(ROUNDING); - vcvtusi642sh(a, b, ROUNDING) +pub fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __m128h { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtusi642sh(a, b, ROUNDING) + } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store @@ -82,8 +86,8 @@ pub unsafe fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __ #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2si))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 { - vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsh_i64(a: __m128h) -> i64 { + unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store @@ -103,9 +107,11 @@ pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 { #[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_i64(a: __m128h) -> i64 { - static_assert_rounding!(ROUNDING); - vcvtsh2si64(a, ROUNDING) +pub fn _mm_cvt_roundsh_i64(a: __m128h) -> i64 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2si64(a, ROUNDING) + } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store @@ -116,8 +122,8 @@ pub unsafe fn _mm_cvt_roundsh_i64(a: __m128h) -> i64 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvtsh2usi))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 { - vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvtsh_u64(a: __m128h) -> u64 { + unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store @@ -137,9 +143,11 @@ pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 { #[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvt_roundsh_u64(a: __m128h) -> u64 { - static_assert_rounding!(ROUNDING); - vcvtsh2usi64(a, ROUNDING) +pub fn _mm_cvt_roundsh_u64(a: __m128h) -> u64 { + unsafe { + static_assert_rounding!(ROUNDING); + vcvtsh2usi64(a, ROUNDING) + } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation, @@ -150,8 +158,8 @@ pub unsafe fn _mm_cvt_roundsh_u64(a: __m128h) -> u64 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttsh2si))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 { - vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsh_i64(a: __m128h) -> i64 { + unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation, @@ -165,9 +173,11 @@ pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 { #[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtt_roundsh_i64(a: __m128h) -> i64 { - static_assert_sae!(SAE); - vcvttsh2si64(a, SAE) +pub fn _mm_cvtt_roundsh_i64(a: __m128h) -> i64 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2si64(a, SAE) + } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation, @@ -178,8 +188,8 @@ pub unsafe fn _mm_cvtt_roundsh_i64(a: __m128h) -> i64 { #[target_feature(enable = "avx512fp16")] #[cfg_attr(test, assert_instr(vcvttsh2usi))] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 { - vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) +pub fn _mm_cvttsh_u64(a: __m128h) -> u64 { + unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) } } /// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation, @@ -193,9 +203,11 @@ pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 { #[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub unsafe fn _mm_cvtt_roundsh_u64(a: __m128h) -> u64 { - static_assert_sae!(SAE); - vcvttsh2usi64(a, SAE) +pub fn _mm_cvtt_roundsh_u64(a: __m128h) -> u64 { + unsafe { + static_assert_sae!(SAE); + vcvttsh2usi64(a, SAE) + } } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86_64/bmi.rs b/crates/core_arch/src/x86_64/bmi.rs index 167ad26bfc..5d204d51ae 100644 --- a/crates/core_arch/src/x86_64/bmi.rs +++ b/crates/core_arch/src/x86_64/bmi.rs @@ -21,7 +21,7 @@ use stdarch_test::assert_instr; #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { +pub fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) } @@ -37,8 +37,8 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { - x86_bmi_bextr_64(a, control) +pub fn _bextr2_u64(a: u64, control: u64) -> u64 { + unsafe { x86_bmi_bextr_64(a, control) } } /// Bitwise logical `AND` of inverted `a` with `b`. @@ -48,7 +48,7 @@ pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { +pub fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } @@ -60,7 +60,7 @@ pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { #[cfg_attr(test, assert_instr(blsi))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsi_u64(x: u64) -> u64 { +pub fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } @@ -72,7 +72,7 @@ pub unsafe fn _blsi_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blsmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsmsk_u64(x: u64) -> u64 { +pub fn _blsmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_sub(1_u64)) } @@ -86,7 +86,7 @@ pub unsafe fn _blsmsk_u64(x: u64) -> u64 { #[cfg_attr(test, assert_instr(blsr))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _blsr_u64(x: u64) -> u64 { +pub fn _blsr_u64(x: u64) -> u64 { x & (x.wrapping_sub(1)) } @@ -99,7 +99,7 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _tzcnt_u64(x: u64) -> u64 { +pub fn _tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } @@ -112,7 +112,7 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 { #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { +pub fn _mm_tzcnt_64(x: u64) -> i64 { x.trailing_zeros() as i64 } diff --git a/crates/core_arch/src/x86_64/bmi2.rs b/crates/core_arch/src/x86_64/bmi2.rs index 5ca4c6bde4..ea9daf8857 100644 --- a/crates/core_arch/src/x86_64/bmi2.rs +++ b/crates/core_arch/src/x86_64/bmi2.rs @@ -24,7 +24,7 @@ use stdarch_test::assert_instr; #[target_feature(enable = "bmi2")] #[cfg(not(target_arch = "x86"))] // calls an intrinsic #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { +pub fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { let result: u128 = (a as u128) * (b as u128); *hi = (result >> 64) as u64; result as u64 @@ -38,8 +38,8 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { #[cfg_attr(test, assert_instr(bzhi))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { - x86_bmi2_bzhi_64(a, index as u64) +pub fn _bzhi_u64(a: u64, index: u32) -> u64 { + unsafe { x86_bmi2_bzhi_64(a, index as u64) } } /// Scatter contiguous low order bits of `a` to the result at the positions @@ -51,8 +51,8 @@ pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { #[cfg_attr(test, assert_instr(pdep))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { - x86_bmi2_pdep_64(a, mask) +pub fn _pdep_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pdep_64(a, mask) } } /// Gathers the bits of `x` specified by the `mask` into the contiguous low @@ -64,8 +64,8 @@ pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { #[cfg_attr(test, assert_instr(pext))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 { - x86_bmi2_pext_64(a, mask) +pub fn _pext_u64(a: u64, mask: u64) -> u64 { + unsafe { x86_bmi2_pext_64(a, mask) } } unsafe extern "C" { diff --git a/crates/core_arch/src/x86_64/sse.rs b/crates/core_arch/src/x86_64/sse.rs index c5f70ccb39..863c3cd2e7 100644 --- a/crates/core_arch/src/x86_64/sse.rs +++ b/crates/core_arch/src/x86_64/sse.rs @@ -30,8 +30,8 @@ unsafe extern "C" { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { - cvtss2si64(a) +pub fn _mm_cvtss_si64(a: __m128) -> i64 { + unsafe { cvtss2si64(a) } } /// Converts the lowest 32 bit float in the input vector to a 64 bit integer @@ -49,8 +49,8 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { - cvttss2si64(a) +pub fn _mm_cvttss_si64(a: __m128) -> i64 { + unsafe { cvttss2si64(a) } } /// Converts a 64 bit integer to a 32 bit float. The result vector is the input @@ -64,8 +64,8 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { - cvtsi642ss(a, b) +pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { + unsafe { cvtsi642ss(a, b) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86_64/sse2.rs b/crates/core_arch/src/x86_64/sse2.rs index b1cd12506a..760661f0d2 100644 --- a/crates/core_arch/src/x86_64/sse2.rs +++ b/crates/core_arch/src/x86_64/sse2.rs @@ -21,8 +21,8 @@ unsafe extern "C" { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { - cvtsd2si64(a) +pub fn _mm_cvtsd_si64(a: __m128d) -> i64 { + unsafe { cvtsd2si64(a) } } /// Alias for `_mm_cvtsd_si64` @@ -32,7 +32,7 @@ pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { +pub fn _mm_cvtsd_si64x(a: __m128d) -> i64 { _mm_cvtsd_si64(a) } @@ -44,8 +44,8 @@ pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { - cvttsd2si64(a) +pub fn _mm_cvttsd_si64(a: __m128d) -> i64 { + unsafe { cvttsd2si64(a) } } /// Alias for `_mm_cvttsd_si64` @@ -55,7 +55,7 @@ pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { +pub fn _mm_cvttsd_si64x(a: __m128d) -> i64 { _mm_cvttsd_si64(a) } @@ -94,7 +94,7 @@ pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { +pub fn _mm_cvtsi64_si128(a: i64) -> __m128i { _mm_set_epi64x(0, a) } @@ -106,7 +106,7 @@ pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { +pub fn _mm_cvtsi64x_si128(a: i64) -> __m128i { _mm_cvtsi64_si128(a) } @@ -117,8 +117,8 @@ pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { - simd_extract!(a.as_i64x2(), 0) +pub fn _mm_cvtsi128_si64(a: __m128i) -> i64 { + unsafe { simd_extract!(a.as_i64x2(), 0) } } /// Returns the lowest element of `a`. @@ -128,7 +128,7 @@ pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { +pub fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { _mm_cvtsi128_si64(a) } @@ -140,8 +140,8 @@ pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { - simd_insert!(a, 0, b as f64) +pub fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { + unsafe { simd_insert!(a, 0, b as f64) } } /// Returns `a` with its lower element replaced by `b` after converting it to @@ -152,7 +152,7 @@ pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { +pub fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { _mm_cvtsi64_sd(a, b) } diff --git a/crates/core_arch/src/x86_64/sse41.rs b/crates/core_arch/src/x86_64/sse41.rs index 5b6c72e3fc..e57ffac1ca 100644 --- a/crates/core_arch/src/x86_64/sse41.rs +++ b/crates/core_arch/src/x86_64/sse41.rs @@ -13,9 +13,9 @@ use stdarch_test::assert_instr; #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(pextrq, IMM1 = 1))] #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { +pub fn _mm_extract_epi64(a: __m128i) -> i64 { static_assert_uimm_bits!(IMM1, 1); - simd_extract!(a.as_i64x2(), IMM1 as u32) + unsafe { simd_extract!(a.as_i64x2(), IMM1 as u32) } } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a @@ -27,9 +27,9 @@ pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { #[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { +pub fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) + unsafe { transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) } } #[cfg(test)] diff --git a/crates/core_arch/src/x86_64/sse42.rs b/crates/core_arch/src/x86_64/sse42.rs index 6c3111b719..64a23b2b19 100644 --- a/crates/core_arch/src/x86_64/sse42.rs +++ b/crates/core_arch/src/x86_64/sse42.rs @@ -17,8 +17,8 @@ unsafe extern "C" { #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { - crc32_64_64(crc, v) +pub fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { + unsafe { crc32_64_64(crc, v) } } #[cfg(test)]