|
1 | 1 | use crate::{
|
2 | 2 | core_arch::{simd::*, simd_llvm::*, x86::*},
|
3 | 3 | mem::{self, transmute},
|
| 4 | + ptr, |
4 | 5 | };
|
5 | 6 |
|
6 | 7 | #[cfg(test)]
|
@@ -1633,6 +1634,113 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
|
1633 | 1634 | transmute(r)
|
1634 | 1635 | }
|
1635 | 1636 |
|
| 1637 | +/// Returns vector of type `__m512d` with undefined elements. |
| 1638 | +/// |
| 1639 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) |
| 1640 | +#[inline] |
| 1641 | +#[target_feature(enable = "avx512f")] |
| 1642 | +// This intrinsic has no corresponding instruction. |
| 1643 | +pub unsafe fn _mm512_undefined_pd() -> __m512d { |
| 1644 | + _mm512_set1_pd(0.0) |
| 1645 | +} |
| 1646 | + |
| 1647 | +/// Returns vector of type `__m512` with undefined elements. |
| 1648 | +/// |
| 1649 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) |
| 1650 | +#[inline] |
| 1651 | +#[target_feature(enable = "avx512f")] |
| 1652 | +// This intrinsic has no corresponding instruction. |
| 1653 | +pub unsafe fn _mm512_undefined_ps() -> __m512 { |
| 1654 | + _mm512_set1_ps(0.0) |
| 1655 | +} |
| 1656 | + |
| 1657 | +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) |
| 1658 | +/// floating-point elements) from memory into result. |
| 1659 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1660 | +/// |
| 1661 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) |
| 1662 | +#[inline] |
| 1663 | +#[target_feature(enable = "avx512f")] |
| 1664 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1665 | +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { |
| 1666 | + ptr::read_unaligned(mem_addr as *const __m512d) |
| 1667 | +} |
| 1668 | + |
| 1669 | +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) |
| 1670 | +/// floating-point elements) from `a` into memory. |
| 1671 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1672 | +/// |
| 1673 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) |
| 1674 | +#[inline] |
| 1675 | +#[target_feature(enable = "avx512f")] |
| 1676 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1677 | +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { |
| 1678 | + ptr::write_unaligned(mem_addr as *mut __m512d, a); |
| 1679 | +} |
| 1680 | + |
| 1681 | +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) |
| 1682 | +/// floating-point elements) from memory into result. |
| 1683 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1684 | +/// |
| 1685 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) |
| 1686 | +#[inline] |
| 1687 | +#[target_feature(enable = "avx512f")] |
| 1688 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1689 | +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { |
| 1690 | + ptr::read_unaligned(mem_addr as *const __m512) |
| 1691 | +} |
| 1692 | + |
| 1693 | +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) |
| 1694 | +/// floating-point elements) from `a` into memory. |
| 1695 | +/// `mem_addr` does not need to be aligned on any particular boundary. |
| 1696 | +/// |
| 1697 | +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) |
| 1698 | +#[inline] |
| 1699 | +#[target_feature(enable = "avx512f")] |
| 1700 | +#[cfg_attr(test, assert_instr(vmovups))] |
| 1701 | +#[stable(feature = "simd_x86", since = "1.27.0")] |
| 1702 | +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { |
| 1703 | + ptr::write_unaligned(mem_addr as *mut __m512, a); |
| 1704 | +} |
| 1705 | + |
| 1706 | +/// Sets packed 64-bit integers in `dst` with the supplied values in |
| 1707 | +/// reverse order. |
| 1708 | +/// |
| 1709 | +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) |
| 1710 | +#[inline] |
| 1711 | +#[target_feature(enable = "avx512f")] |
| 1712 | +pub unsafe fn _mm512_setr_pd( |
| 1713 | + e0: f64, |
| 1714 | + e1: f64, |
| 1715 | + e2: f64, |
| 1716 | + e3: f64, |
| 1717 | + e4: f64, |
| 1718 | + e5: f64, |
| 1719 | + e6: f64, |
| 1720 | + e7: f64, |
| 1721 | +) -> __m512d { |
| 1722 | + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); |
| 1723 | + transmute(r) |
| 1724 | +} |
| 1725 | + |
| 1726 | +/// Sets packed 64-bit integers in `dst` with the supplied values. |
| 1727 | +/// |
| 1728 | +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) |
| 1729 | +#[inline] |
| 1730 | +#[target_feature(enable = "avx512f")] |
| 1731 | +pub unsafe fn _mm512_set_pd( |
| 1732 | + e0: f64, |
| 1733 | + e1: f64, |
| 1734 | + e2: f64, |
| 1735 | + e3: f64, |
| 1736 | + e4: f64, |
| 1737 | + e5: f64, |
| 1738 | + e6: f64, |
| 1739 | + e7: f64, |
| 1740 | +) -> __m512d { |
| 1741 | + _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) |
| 1742 | +} |
| 1743 | + |
1636 | 1744 | /// Equal
|
1637 | 1745 | pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
|
1638 | 1746 | /// Less-than
|
@@ -1702,6 +1810,7 @@ mod tests {
|
1702 | 1810 | use stdarch_test::simd_test;
|
1703 | 1811 |
|
1704 | 1812 | use crate::core_arch::x86::*;
|
| 1813 | + use crate::hint::black_box; |
1705 | 1814 |
|
1706 | 1815 | #[simd_test(enable = "avx512f")]
|
1707 | 1816 | unsafe fn test_mm512_abs_epi32() {
|
@@ -2326,4 +2435,54 @@ mod tests {
|
2326 | 2435 | unsafe fn test_mm512_setzero_ps() {
|
2327 | 2436 | assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
|
2328 | 2437 | }
|
| 2438 | + |
| 2439 | + #[simd_test(enable = "avx512f")] |
| 2440 | + unsafe fn test_mm512_loadu_pd() { |
| 2441 | + let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; |
| 2442 | + let p = a.as_ptr(); |
| 2443 | + let r = _mm512_loadu_pd(black_box(p)); |
| 2444 | + let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.); |
| 2445 | + assert_eq_m512d(r, e); |
| 2446 | + } |
| 2447 | + |
| 2448 | + #[simd_test(enable = "avx512f")] |
| 2449 | + unsafe fn test_mm512_storeu_pd() { |
| 2450 | + let a = _mm512_set1_pd(9.); |
| 2451 | + let mut r = _mm512_undefined_pd(); |
| 2452 | + _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a); |
| 2453 | + assert_eq_m512d(r, a); |
| 2454 | + } |
| 2455 | + |
| 2456 | + #[simd_test(enable = "avx512f")] |
| 2457 | + unsafe fn test_mm512_loadu_ps() { |
| 2458 | + let a = &[ |
| 2459 | + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., |
| 2460 | + ]; |
| 2461 | + let p = a.as_ptr(); |
| 2462 | + let r = _mm512_loadu_ps(black_box(p)); |
| 2463 | + let e = _mm512_setr_ps( |
| 2464 | + 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50., |
| 2465 | + ); |
| 2466 | + assert_eq_m512(r, e); |
| 2467 | + } |
| 2468 | + |
| 2469 | + #[simd_test(enable = "avx512f")] |
| 2470 | + unsafe fn test_mm512_storeu_ps() { |
| 2471 | + let a = _mm512_set1_ps(9.); |
| 2472 | + let mut r = _mm512_undefined_ps(); |
| 2473 | + _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a); |
| 2474 | + assert_eq_m512(r, a); |
| 2475 | + } |
| 2476 | + |
| 2477 | + #[simd_test(enable = "avx512f")] |
| 2478 | + unsafe fn test_mm512_setr_pd() { |
| 2479 | + let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); |
| 2480 | + assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.)); |
| 2481 | + } |
| 2482 | + |
| 2483 | + #[simd_test(enable = "avx512f")] |
| 2484 | + unsafe fn test_mm512_set_pd() { |
| 2485 | + let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); |
| 2486 | + assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); |
| 2487 | + } |
2329 | 2488 | }
|
0 commit comments