diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 68f106f318..623aadebee 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -2956,8 +2956,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_ps() -> __m256 { - // FIXME: this function should return MaybeUninit<__m256> - mem::MaybeUninit::<__m256>::uninit().assume_init() + _mm256_set1_ps(0.0) } /// Returns vector of type `__m256d` with undefined elements. @@ -2968,8 +2967,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 { // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_pd() -> __m256d { - // FIXME: this function should return MaybeUninit<__m256d> - mem::MaybeUninit::<__m256d>::uninit().assume_init() + _mm256_set1_pd(0.0) } /// Returns vector of type __m256i with undefined elements. diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index a9ba0ef3cd..56c8172f14 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -1,6 +1,7 @@ use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::{self, transmute}, + ptr, }; #[cfg(test)] @@ -1633,6 +1634,113 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask( transmute(r) } +/// Returns vector of type `__m512d` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_pd() -> __m512d { + _mm512_set1_pd(0.0) +} + +/// Returns vector of type `__m512` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_ps() -> __m512 { + _mm512_set1_ps(0.0) +} + +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { + ptr::read_unaligned(mem_addr as *const __m512d) +} + +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { + ptr::write_unaligned(mem_addr as *mut __m512d, a); +} + +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { + ptr::read_unaligned(mem_addr as *const __m512) +} + +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { + ptr::write_unaligned(mem_addr as *mut __m512, a); +} + +/// Sets packed 64-bit integers in `dst` with the supplied values in +/// reverse order. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_setr_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) +} + +/// Sets packed 64-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_set_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +} + /// Equal pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; /// Less-than @@ -1702,6 +1810,7 @@ mod tests { use stdarch_test::simd_test; use crate::core_arch::x86::*; + use crate::hint::black_box; #[simd_test(enable = "avx512f")] unsafe fn test_mm512_abs_epi32() { @@ -2326,4 +2435,54 @@ mod tests { unsafe fn test_mm512_setzero_ps() { assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.)); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_pd() { + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; + let p = a.as_ptr(); + let r = _mm512_loadu_pd(black_box(p)); + let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_pd() { + let a = _mm512_set1_pd(9.); + let mut r = _mm512_undefined_pd(); + _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_ps() { + let a = &[ + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ]; + let p = a.as_ptr(); + let r = _mm512_loadu_ps(black_box(p)); + let e = _mm512_setr_ps( + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_ps() { + let a = _mm512_set1_ps(9.); + let mut r = _mm512_undefined_ps(); + _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); + assert_eq_m512(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_pd() { + let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_pd() { + let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); + } } diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 1b2b8349b1..3de09ca964 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1865,8 +1865,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_ps() -> __m128 { - // FIXME: this function should return MaybeUninit<__m128> - mem::MaybeUninit::<__m128>::uninit().assume_init() + _mm_set1_ps(0.0) } /// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index f832f2eaa5..0b5e333517 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -3,44 +3,6 @@ use crate::{ mem::transmute, }; -/// Sets packed 64-bit integers in `dst` with the supplied values. -/// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) -#[inline] -#[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) -} - -/// Sets packed 64-bit integers in `dst` with the supplied values in -/// reverse order. -/// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) -#[inline] -#[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); - transmute(r) -} - /// Sets packed 64-bit integers in `dst` with the supplied values. /// /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64) @@ -311,18 +273,6 @@ mod tests { assert_eq!(r, 0b01001010); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_set_pd() { - let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); - assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); - } - - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_setr_pd() { - let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); - assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); - } - #[simd_test(enable = "avx512f")] unsafe fn test_mm512_set_epi64() { let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 5adf5e6ef5..f79483fc08 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -282,6 +282,8 @@ fn verify_all_signatures() { "_mm_tzcnt_64", "_fxsave64", "_fxrstor64", + "_mm512_undefined_ps", + "_mm512_undefined_pd", ]; if !skip.contains(&rust.name) { println!( @@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(), (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {} (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {} + (&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {} + (&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {} (&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {} (&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {} (&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {} @@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(), (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {} (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {} + (&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {} + (&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {} (&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {} (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {} (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}