Skip to content

Commit 9faced9

Browse files
authored
added f32 and f64 unaligned stores and loads from avx512f set (rust-lang#873)
1 parent a371069 commit 9faced9

File tree

5 files changed

+168
-56
lines changed

5 files changed

+168
-56
lines changed

crates/core_arch/src/x86/avx.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -2956,8 +2956,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
29562956
// This intrinsic has no corresponding instruction.
29572957
#[stable(feature = "simd_x86", since = "1.27.0")]
29582958
pub unsafe fn _mm256_undefined_ps() -> __m256 {
2959-
// FIXME: this function should return MaybeUninit<__m256>
2960-
mem::MaybeUninit::<__m256>::uninit().assume_init()
2959+
_mm256_set1_ps(0.0)
29612960
}
29622961

29632962
/// Returns vector of type `__m256d` with undefined elements.
@@ -2968,8 +2967,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
29682967
// This intrinsic has no corresponding instruction.
29692968
#[stable(feature = "simd_x86", since = "1.27.0")]
29702969
pub unsafe fn _mm256_undefined_pd() -> __m256d {
2971-
// FIXME: this function should return MaybeUninit<__m256d>
2972-
mem::MaybeUninit::<__m256d>::uninit().assume_init()
2970+
_mm256_set1_pd(0.0)
29732971
}
29742972

29752973
/// Returns vector of type __m256i with undefined elements.

crates/core_arch/src/x86/avx512f.rs

+159
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::{
22
core_arch::{simd::*, simd_llvm::*, x86::*},
33
mem::{self, transmute},
4+
ptr,
45
};
56

67
#[cfg(test)]
@@ -1633,6 +1634,113 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
16331634
transmute(r)
16341635
}
16351636

1637+
/// Returns vector of type `__m512d` with undefined elements.
1638+
///
1639+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
1640+
#[inline]
1641+
#[target_feature(enable = "avx512f")]
1642+
// This intrinsic has no corresponding instruction.
1643+
pub unsafe fn _mm512_undefined_pd() -> __m512d {
1644+
_mm512_set1_pd(0.0)
1645+
}
1646+
1647+
/// Returns vector of type `__m512` with undefined elements.
1648+
///
1649+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
1650+
#[inline]
1651+
#[target_feature(enable = "avx512f")]
1652+
// This intrinsic has no corresponding instruction.
1653+
pub unsafe fn _mm512_undefined_ps() -> __m512 {
1654+
_mm512_set1_ps(0.0)
1655+
}
1656+
1657+
/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
1658+
/// floating-point elements) from memory into result.
1659+
/// `mem_addr` does not need to be aligned on any particular boundary.
1660+
///
1661+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
1662+
#[inline]
1663+
#[target_feature(enable = "avx512f")]
1664+
#[cfg_attr(test, assert_instr(vmovups))]
1665+
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
1666+
ptr::read_unaligned(mem_addr as *const __m512d)
1667+
}
1668+
1669+
/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
1670+
/// floating-point elements) from `a` into memory.
1671+
/// `mem_addr` does not need to be aligned on any particular boundary.
1672+
///
1673+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
1674+
#[inline]
1675+
#[target_feature(enable = "avx512f")]
1676+
#[cfg_attr(test, assert_instr(vmovups))]
1677+
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
1678+
ptr::write_unaligned(mem_addr as *mut __m512d, a);
1679+
}
1680+
1681+
/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
1682+
/// floating-point elements) from memory into result.
1683+
/// `mem_addr` does not need to be aligned on any particular boundary.
1684+
///
1685+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
1686+
#[inline]
1687+
#[target_feature(enable = "avx512f")]
1688+
#[cfg_attr(test, assert_instr(vmovups))]
1689+
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
1690+
ptr::read_unaligned(mem_addr as *const __m512)
1691+
}
1692+
1693+
/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
1694+
/// floating-point elements) from `a` into memory.
1695+
/// `mem_addr` does not need to be aligned on any particular boundary.
1696+
///
1697+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
1698+
#[inline]
1699+
#[target_feature(enable = "avx512f")]
1700+
#[cfg_attr(test, assert_instr(vmovups))]
1701+
#[stable(feature = "simd_x86", since = "1.27.0")]
1702+
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
1703+
ptr::write_unaligned(mem_addr as *mut __m512, a);
1704+
}
1705+
1706+
/// Sets packed 64-bit integers in `dst` with the supplied values in
1707+
/// reverse order.
1708+
///
1709+
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
1710+
#[inline]
1711+
#[target_feature(enable = "avx512f")]
1712+
pub unsafe fn _mm512_setr_pd(
1713+
e0: f64,
1714+
e1: f64,
1715+
e2: f64,
1716+
e3: f64,
1717+
e4: f64,
1718+
e5: f64,
1719+
e6: f64,
1720+
e7: f64,
1721+
) -> __m512d {
1722+
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
1723+
transmute(r)
1724+
}
1725+
1726+
/// Sets packed 64-bit integers in `dst` with the supplied values.
1727+
///
1728+
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
1729+
#[inline]
1730+
#[target_feature(enable = "avx512f")]
1731+
pub unsafe fn _mm512_set_pd(
1732+
e0: f64,
1733+
e1: f64,
1734+
e2: f64,
1735+
e3: f64,
1736+
e4: f64,
1737+
e5: f64,
1738+
e6: f64,
1739+
e7: f64,
1740+
) -> __m512d {
1741+
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
1742+
}
1743+
16361744
/// Equal
16371745
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
16381746
/// Less-than
@@ -1702,6 +1810,7 @@ mod tests {
17021810
use stdarch_test::simd_test;
17031811

17041812
use crate::core_arch::x86::*;
1813+
use crate::hint::black_box;
17051814

17061815
#[simd_test(enable = "avx512f")]
17071816
unsafe fn test_mm512_abs_epi32() {
@@ -2326,4 +2435,54 @@ mod tests {
23262435
unsafe fn test_mm512_setzero_ps() {
23272436
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
23282437
}
2438+
2439+
#[simd_test(enable = "avx512f")]
2440+
unsafe fn test_mm512_loadu_pd() {
2441+
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
2442+
let p = a.as_ptr();
2443+
let r = _mm512_loadu_pd(black_box(p));
2444+
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
2445+
assert_eq_m512d(r, e);
2446+
}
2447+
2448+
#[simd_test(enable = "avx512f")]
2449+
unsafe fn test_mm512_storeu_pd() {
2450+
let a = _mm512_set1_pd(9.);
2451+
let mut r = _mm512_undefined_pd();
2452+
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
2453+
assert_eq_m512d(r, a);
2454+
}
2455+
2456+
#[simd_test(enable = "avx512f")]
2457+
unsafe fn test_mm512_loadu_ps() {
2458+
let a = &[
2459+
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
2460+
];
2461+
let p = a.as_ptr();
2462+
let r = _mm512_loadu_ps(black_box(p));
2463+
let e = _mm512_setr_ps(
2464+
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
2465+
);
2466+
assert_eq_m512(r, e);
2467+
}
2468+
2469+
#[simd_test(enable = "avx512f")]
2470+
unsafe fn test_mm512_storeu_ps() {
2471+
let a = _mm512_set1_ps(9.);
2472+
let mut r = _mm512_undefined_ps();
2473+
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
2474+
assert_eq_m512(r, a);
2475+
}
2476+
2477+
#[simd_test(enable = "avx512f")]
2478+
unsafe fn test_mm512_setr_pd() {
2479+
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
2480+
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
2481+
}
2482+
2483+
#[simd_test(enable = "avx512f")]
2484+
unsafe fn test_mm512_set_pd() {
2485+
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
2486+
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
2487+
}
23292488
}

crates/core_arch/src/x86/sse.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1865,8 +1865,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
18651865
#[target_feature(enable = "sse")]
18661866
#[stable(feature = "simd_x86", since = "1.27.0")]
18671867
pub unsafe fn _mm_undefined_ps() -> __m128 {
1868-
// FIXME: this function should return MaybeUninit<__m128>
1869-
mem::MaybeUninit::<__m128>::uninit().assume_init()
1868+
_mm_set1_ps(0.0)
18701869
}
18711870

18721871
/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place.

crates/core_arch/src/x86_64/avx512f.rs

-50
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,6 @@ use crate::{
33
mem::transmute,
44
};
55

6-
/// Sets packed 64-bit integers in `dst` with the supplied values.
7-
///
8-
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
9-
#[inline]
10-
#[target_feature(enable = "avx512f")]
11-
pub unsafe fn _mm512_set_pd(
12-
e0: f64,
13-
e1: f64,
14-
e2: f64,
15-
e3: f64,
16-
e4: f64,
17-
e5: f64,
18-
e6: f64,
19-
e7: f64,
20-
) -> __m512d {
21-
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
22-
}
23-
24-
/// Sets packed 64-bit integers in `dst` with the supplied values in
25-
/// reverse order.
26-
///
27-
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
28-
#[inline]
29-
#[target_feature(enable = "avx512f")]
30-
pub unsafe fn _mm512_setr_pd(
31-
e0: f64,
32-
e1: f64,
33-
e2: f64,
34-
e3: f64,
35-
e4: f64,
36-
e5: f64,
37-
e6: f64,
38-
e7: f64,
39-
) -> __m512d {
40-
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
41-
transmute(r)
42-
}
43-
446
/// Sets packed 64-bit integers in `dst` with the supplied values.
457
///
468
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
@@ -311,18 +273,6 @@ mod tests {
311273
assert_eq!(r, 0b01001010);
312274
}
313275

314-
#[simd_test(enable = "avx512f")]
315-
unsafe fn test_mm512_set_pd() {
316-
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
317-
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
318-
}
319-
320-
#[simd_test(enable = "avx512f")]
321-
unsafe fn test_mm512_setr_pd() {
322-
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
323-
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
324-
}
325-
326276
#[simd_test(enable = "avx512f")]
327277
unsafe fn test_mm512_set_epi64() {
328278
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);

crates/stdarch-verify/tests/x86-intel.rs

+6
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ fn verify_all_signatures() {
282282
"_mm_tzcnt_64",
283283
"_fxsave64",
284284
"_fxrstor64",
285+
"_mm512_undefined_ps",
286+
"_mm512_undefined_pd",
285287
];
286288
if !skip.contains(&rust.name) {
287289
println!(
@@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
625627

626628
(&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
627629
(&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
630+
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
631+
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
628632
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
629633
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
630634
(&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
@@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
646650

647651
(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
648652
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
653+
(&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
654+
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
649655
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
650656
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
651657
(&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}

0 commit comments

Comments
 (0)