Skip to content

Commit a4cde4a

Browse files
lu-zeroAmanieu
authored andcommitted
Add vec_xl
1 parent 75c52f8 commit a4cde4a

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

crates/core_arch/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
proc_macro_hygiene,
1313
stmt_expr_attributes,
1414
core_intrinsics,
15+
intrinsics,
1516
no_core,
1617
rustc_attrs,
1718
stdsimd,

crates/core_arch/src/powerpc/altivec.rs

+83
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
use crate::{
1717
core_arch::{simd::*, simd_llvm::*},
18+
mem,
1819
mem::transmute,
1920
};
2021

@@ -534,6 +535,60 @@ mod sealed {
534535

535536
impl_vec_lde! { vec_lde_f32 lvewx f32 }
536537

538+
pub trait VectorXl {
539+
type Result;
540+
unsafe fn vec_xl(self, a: isize) -> Self::Result;
541+
}
542+
543+
macro_rules! impl_vec_xl {
544+
($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => {
545+
#[inline]
546+
#[target_feature(enable = "altivec")]
547+
#[cfg_attr(
548+
all(test, not(target_feature = "power9-altivec")),
549+
assert_instr($notpwr9)
550+
)]
551+
#[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))]
552+
pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) {
553+
let addr = (b as *const u8).offset(a);
554+
555+
// Workaround ptr::copy_nonoverlapping not being inlined
556+
extern "rust-intrinsic" {
557+
#[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")]
558+
#[rustc_nounwind]
559+
pub fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize);
560+
}
561+
562+
let mut r = mem::MaybeUninit::uninit();
563+
564+
copy_nonoverlapping(
565+
addr,
566+
r.as_mut_ptr() as *mut u8,
567+
mem::size_of::<t_t_l!($ty)>(),
568+
);
569+
570+
r.assume_init()
571+
}
572+
573+
impl VectorXl for *const $ty {
574+
type Result = t_t_l!($ty);
575+
#[inline]
576+
#[target_feature(enable = "altivec")]
577+
unsafe fn vec_xl(self, a: isize) -> Self::Result {
578+
$fun(a, self)
579+
}
580+
}
581+
};
582+
}
583+
584+
impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 }
585+
impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 }
586+
impl_vec_xl! { vec_xl_i16 lxvd2x / lxv i16 }
587+
impl_vec_xl! { vec_xl_u16 lxvd2x / lxv u16 }
588+
impl_vec_xl! { vec_xl_i32 lxvd2x / lxv i32 }
589+
impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 }
590+
impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 }
591+
537592
test_impl! { vec_floor(a: vector_float) -> vector_float [ vfloor, vrfim / xvrspim ] }
538593

539594
test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] }
@@ -2507,6 +2562,16 @@ where
25072562
p.vec_lde(off)
25082563
}
25092564

2565+
/// VSX Unaligned Load
2566+
#[inline]
2567+
#[target_feature(enable = "altivec")]
2568+
pub unsafe fn vec_xl<T>(off: isize, p: T) -> <T as sealed::VectorXl>::Result
2569+
where
2570+
T: sealed::VectorXl,
2571+
{
2572+
p.vec_xl(off)
2573+
}
2574+
25102575
/// Vector Base-2 Logarithm Estimate
25112576
#[inline]
25122577
#[target_feature(enable = "altivec")]
@@ -3302,6 +3367,24 @@ mod tests {
33023367
}
33033368
}
33043369

3370+
#[simd_test(enable = "altivec")]
3371+
unsafe fn test_vec_xl() {
3372+
let pat = [
3373+
u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
3374+
u8x16::new(
3375+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3376+
),
3377+
];
3378+
3379+
for off in 0..16 {
3380+
let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
3381+
for i in 0..16 {
3382+
let v = val.extract(i);
3383+
assert_eq!(off as usize + i, v as usize);
3384+
}
3385+
}
3386+
}
3387+
33053388
#[simd_test(enable = "altivec")]
33063389
unsafe fn test_vec_ldl() {
33073390
let pat = [

0 commit comments

Comments
 (0)