Add vec_xl

lu-zero · Amanieu · commit a4cde4a0ec9d · 2023-07-03T20:39:57.000+01:00
diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
@@ -12,6 +12,7 @@
     proc_macro_hygiene,
     stmt_expr_attributes,
     core_intrinsics,
+    intrinsics,
     no_core,
     rustc_attrs,
     stdsimd,
diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs
@@ -15,6 +15,7 @@
 
 use crate::{
     core_arch::{simd::*, simd_llvm::*},
+    mem,
     mem::transmute,
 };
 
@@ -534,6 +535,60 @@ mod sealed {
 
     impl_vec_lde! { vec_lde_f32 lvewx f32 }
 
+    pub trait VectorXl {
+        type Result;
+        unsafe fn vec_xl(self, a: isize) -> Self::Result;
+    }
+
+    macro_rules! impl_vec_xl {
+        ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => {
+            #[inline]
+            #[target_feature(enable = "altivec")]
+            #[cfg_attr(
+                all(test, not(target_feature = "power9-altivec")),
+                assert_instr($notpwr9)
+            )]
+            #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))]
+            pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) {
+                let addr = (b as *const u8).offset(a);
+
+                // Workaround ptr::copy_nonoverlapping not being inlined
+                extern "rust-intrinsic" {
+                    #[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
+                    #[rustc_nounwind]
+                    pub fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
+                }
+
+                let mut r = mem::MaybeUninit::uninit();
+
+                copy_nonoverlapping(
+                    addr,
+                    r.as_mut_ptr() as *mut u8,
+                    mem::size_of::<t_t_l!($ty)>(),
+                );
+
+                r.assume_init()
+            }
+
+            impl VectorXl for *const $ty {
+                type Result = t_t_l!($ty);
+                #[inline]
+                #[target_feature(enable = "altivec")]
+                unsafe fn vec_xl(self, a: isize) -> Self::Result {
+                    $fun(a, self)
+                }
+            }
+        };
+    }
+
+    impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 }
+    impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 }
+    impl_vec_xl! { vec_xl_i16 lxvd2x / lxv i16 }
+    impl_vec_xl! { vec_xl_u16 lxvd2x / lxv u16 }
+    impl_vec_xl! { vec_xl_i32 lxvd2x / lxv i32 }
+    impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 }
+    impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 }
+
     test_impl! { vec_floor(a: vector_float) -> vector_float [ vfloor, vrfim / xvrspim ] }
 
     test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] }
@@ -2507,6 +2562,16 @@ where
     p.vec_lde(off)
 }
 
+/// VSX Unaligned Load
+#[inline]
+#[target_feature(enable = "altivec")]
+pub unsafe fn vec_xl<T>(off: isize, p: T) -> <T as sealed::VectorXl>::Result
+where
+    T: sealed::VectorXl,
+{
+    p.vec_xl(off)
+}
+
 /// Vector Base-2 Logarithm Estimate
 #[inline]
 #[target_feature(enable = "altivec")]
@@ -3302,6 +3367,24 @@ mod tests {
         }
     }
 
+    #[simd_test(enable = "altivec")]
+    unsafe fn test_vec_xl() {
+        let pat = [
+            u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
+            u8x16::new(
+                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+            ),
+        ];
+
+        for off in 0..16 {
+            let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
+            for i in 0..16 {
+                let v = val.extract(i);
+                assert_eq!(off as usize + i, v as usize);
+            }
+        }
+    }
+
     #[simd_test(enable = "altivec")]
     unsafe fn test_vec_ldl() {
         let pat = [