|
15 | 15 |
|
16 | 16 | use crate::{
|
17 | 17 | core_arch::{simd::*, simd_llvm::*},
|
| 18 | + mem, |
18 | 19 | mem::transmute,
|
19 | 20 | };
|
20 | 21 |
|
@@ -534,6 +535,60 @@ mod sealed {
|
534 | 535 |
|
535 | 536 | impl_vec_lde! { vec_lde_f32 lvewx f32 }
|
536 | 537 |
|
| 538 | + pub trait VectorXl { |
| 539 | + type Result; |
| 540 | + unsafe fn vec_xl(self, a: isize) -> Self::Result; |
| 541 | + } |
| 542 | + |
| 543 | + macro_rules! impl_vec_xl { |
| 544 | + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { |
| 545 | + #[inline] |
| 546 | + #[target_feature(enable = "altivec")] |
| 547 | + #[cfg_attr( |
| 548 | + all(test, not(target_feature = "power9-altivec")), |
| 549 | + assert_instr($notpwr9) |
| 550 | + )] |
| 551 | + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] |
| 552 | + pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) { |
| 553 | + let addr = (b as *const u8).offset(a); |
| 554 | + |
| 555 | + // Workaround ptr::copy_nonoverlapping not being inlined |
| 556 | + extern "rust-intrinsic" { |
| 557 | + #[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.63.0")] |
| 558 | + #[rustc_nounwind] |
| 559 | + pub fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: usize); |
| 560 | + } |
| 561 | + |
| 562 | + let mut r = mem::MaybeUninit::uninit(); |
| 563 | + |
| 564 | + copy_nonoverlapping( |
| 565 | + addr, |
| 566 | + r.as_mut_ptr() as *mut u8, |
| 567 | + mem::size_of::<t_t_l!($ty)>(), |
| 568 | + ); |
| 569 | + |
| 570 | + r.assume_init() |
| 571 | + } |
| 572 | + |
| 573 | + impl VectorXl for *const $ty { |
| 574 | + type Result = t_t_l!($ty); |
| 575 | + #[inline] |
| 576 | + #[target_feature(enable = "altivec")] |
| 577 | + unsafe fn vec_xl(self, a: isize) -> Self::Result { |
| 578 | + $fun(a, self) |
| 579 | + } |
| 580 | + } |
| 581 | + }; |
| 582 | + } |
| 583 | + |
| 584 | + impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 } |
| 585 | + impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 } |
| 586 | + impl_vec_xl! { vec_xl_i16 lxvd2x / lxv i16 } |
| 587 | + impl_vec_xl! { vec_xl_u16 lxvd2x / lxv u16 } |
| 588 | + impl_vec_xl! { vec_xl_i32 lxvd2x / lxv i32 } |
| 589 | + impl_vec_xl! { vec_xl_u32 lxvd2x / lxv u32 } |
| 590 | + impl_vec_xl! { vec_xl_f32 lxvd2x / lxv f32 } |
| 591 | + |
537 | 592 | test_impl! { vec_floor(a: vector_float) -> vector_float [ vfloor, vrfim / xvrspim ] }
|
538 | 593 |
|
539 | 594 | test_impl! { vec_vexptefp(a: vector_float) -> vector_float [ vexptefp, vexptefp ] }
|
@@ -2507,6 +2562,16 @@ where
|
2507 | 2562 | p.vec_lde(off)
|
2508 | 2563 | }
|
2509 | 2564 |
|
| 2565 | +/// VSX Unaligned Load |
| 2566 | +#[inline] |
| 2567 | +#[target_feature(enable = "altivec")] |
| 2568 | +pub unsafe fn vec_xl<T>(off: isize, p: T) -> <T as sealed::VectorXl>::Result |
| 2569 | +where |
| 2570 | + T: sealed::VectorXl, |
| 2571 | +{ |
| 2572 | + p.vec_xl(off) |
| 2573 | +} |
| 2574 | + |
2510 | 2575 | /// Vector Base-2 Logarithm Estimate
|
2511 | 2576 | #[inline]
|
2512 | 2577 | #[target_feature(enable = "altivec")]
|
@@ -3302,6 +3367,24 @@ mod tests {
|
3302 | 3367 | }
|
3303 | 3368 | }
|
3304 | 3369 |
|
| 3370 | + #[simd_test(enable = "altivec")] |
| 3371 | + unsafe fn test_vec_xl() { |
| 3372 | + let pat = [ |
| 3373 | + u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), |
| 3374 | + u8x16::new( |
| 3375 | + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| 3376 | + ), |
| 3377 | + ]; |
| 3378 | + |
| 3379 | + for off in 0..16 { |
| 3380 | + let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off))); |
| 3381 | + for i in 0..16 { |
| 3382 | + let v = val.extract(i); |
| 3383 | + assert_eq!(off as usize + i, v as usize); |
| 3384 | + } |
| 3385 | + } |
| 3386 | + } |
| 3387 | + |
3305 | 3388 | #[simd_test(enable = "altivec")]
|
3306 | 3389 | unsafe fn test_vec_ldl() {
|
3307 | 3390 | let pat = [
|
|
0 commit comments