Skip to content

Commit e0eefe8

Browse files
lu-zeroalexcrichton
authored andcommitted
One more instruction and further refinements (rust-lang#521)
* Add more inline and target_feature decorators * Add Vector Multiply Low and Add Unsigned Half Word * Add Vector Multiply Sum
1 parent 2864908 commit e0eefe8

File tree

1 file changed

+286
-0
lines changed

1 file changed

+286
-0
lines changed

coresimd/powerpc/altivec.rs

+286
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,92 @@ extern "C" {
6969
#[link_name = "llvm.ppc.altivec.vmsumshs"]
7070
fn vmsumshs(
7171
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int;
72+
#[link_name = "llvm.ppc.altivec.vmsumubm"]
73+
fn vmsumubm(
74+
a: vector_unsigned_char, b: vector_unsigned_char,c: vector_unsigned_int) -> vector_unsigned_int;
75+
#[link_name = "llvm.ppc.altivec.vmsummbm"]
76+
fn vmsummbm(
77+
a: vector_signed_char, b: vector_unsigned_char,c: vector_signed_int) -> vector_signed_int;
78+
#[link_name = "llvm.ppc.altivec.vmsumuhm"]
79+
fn vmsumuhm(
80+
a: vector_unsigned_short, b: vector_unsigned_short,c: vector_unsigned_int) -> vector_unsigned_int;
81+
#[link_name = "llvm.ppc.altivec.vmsumshm"]
82+
fn vmsumshm(
83+
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int;
7284
}
7385

7486
mod sealed {
7587

7688
use super::*;
7789

90+
#[inline]
91+
#[target_feature(enable = "altivec")]
92+
#[cfg_attr(test, assert_instr(vmsumubm))]
93+
unsafe fn vec_vmsumubm(
94+
a: vector_unsigned_char, b: vector_unsigned_char,c: vector_unsigned_int) -> vector_unsigned_int {
95+
vmsumubm(a, b, c)
96+
}
97+
98+
#[inline]
99+
#[target_feature(enable = "altivec")]
100+
#[cfg_attr(test, assert_instr(vmsummbm))]
101+
unsafe fn vec_vmsummbm(
102+
a: vector_signed_char, b: vector_unsigned_char,c: vector_signed_int) -> vector_signed_int {
103+
vmsummbm(a, b, c)
104+
}
105+
106+
#[inline]
107+
#[target_feature(enable = "altivec")]
108+
#[cfg_attr(test, assert_instr(vmsumuhm))]
109+
unsafe fn vec_vmsumuhm(
110+
a: vector_unsigned_short, b: vector_unsigned_short,c: vector_unsigned_int) -> vector_unsigned_int {
111+
vmsumuhm(a, b, c)
112+
}
113+
114+
#[inline]
115+
#[target_feature(enable = "altivec")]
116+
#[cfg_attr(test, assert_instr(vmsumshm))]
117+
unsafe fn vec_vmsumshm(
118+
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int {
119+
vmsumshm(a, b, c)
120+
}
121+
122+
pub trait VectorMsum<B, Other> {
123+
unsafe fn vec_msum(self, b: B, c: Other) -> Other;
124+
}
125+
126+
impl VectorMsum<vector_unsigned_char, vector_unsigned_int> for vector_unsigned_char {
127+
#[inline]
128+
#[target_feature(enable = "altivec")]
129+
unsafe fn vec_msum(self, b: vector_unsigned_char, c: vector_unsigned_int) -> vector_unsigned_int {
130+
vmsumubm(self, b, c)
131+
}
132+
}
133+
134+
impl VectorMsum<vector_unsigned_char, vector_signed_int> for vector_signed_char {
135+
#[inline]
136+
#[target_feature(enable = "altivec")]
137+
unsafe fn vec_msum(self, b: vector_unsigned_char, c: vector_signed_int) -> vector_signed_int {
138+
vmsummbm(self, b, c)
139+
}
140+
}
141+
142+
impl VectorMsum<vector_unsigned_short, vector_unsigned_int> for vector_unsigned_short {
143+
#[inline]
144+
#[target_feature(enable = "altivec")]
145+
unsafe fn vec_msum(self, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int {
146+
vmsumuhm(self, b, c)
147+
}
148+
}
149+
150+
impl VectorMsum<vector_signed_short, vector_signed_int> for vector_signed_short {
151+
#[inline]
152+
#[target_feature(enable = "altivec")]
153+
unsafe fn vec_msum(self, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int {
154+
vmsumshm(self, b, c)
155+
}
156+
}
157+
78158
#[inline]
79159
#[target_feature(enable = "altivec")]
80160
#[cfg_attr(test, assert_instr(vmsumuhs))]
@@ -96,12 +176,16 @@ mod sealed {
96176
}
97177

98178
impl VectorMsums<vector_unsigned_int> for vector_unsigned_short {
179+
#[inline]
180+
#[target_feature(enable = "altivec")]
99181
unsafe fn vec_msums(self, b: Self, c: vector_unsigned_int) -> vector_unsigned_int {
100182
vmsumuhs(self, b, c)
101183
}
102184
}
103185

104186
impl VectorMsums<vector_signed_int> for vector_signed_short {
187+
#[inline]
188+
#[target_feature(enable = "altivec")]
105189
unsafe fn vec_msums(self, b: Self, c: vector_signed_int) -> vector_signed_int {
106190
vmsumshs(self, b, c)
107191
}
@@ -430,6 +514,40 @@ mod sealed {
430514
vec_add_float_float(self, other)
431515
}
432516
}
517+
518+
pub trait VectorMladd<Other> {
519+
type Result;
520+
unsafe fn vec_mladd(self, b: Other, c: Other) -> Self::Result;
521+
}
522+
523+
#[inline]
524+
#[target_feature(enable = "altivec")]
525+
#[cfg_attr(test, assert_instr(vmladduhm))]
526+
unsafe fn mladd(a: i16x8, b: i16x8, c: i16x8) -> i16x8 {
527+
simd_add(simd_mul(a, b), c)
528+
}
529+
530+
macro_rules! vector_mladd {
531+
($a: ident, $bc: ident, $d: ident) => {
532+
impl VectorMladd<$bc> for $a {
533+
type Result = $d;
534+
#[inline]
535+
#[target_feature(enable = "altivec")]
536+
unsafe fn vec_mladd(self, b: $bc, c: $bc) -> Self::Result {
537+
let a: i16x8 = ::mem::transmute(self);
538+
let b: i16x8 = ::mem::transmute(b);
539+
let c: i16x8 = ::mem::transmute(c);
540+
541+
::mem::transmute(mladd(a, b, c))
542+
}
543+
}
544+
}
545+
}
546+
547+
vector_mladd! { vector_unsigned_short, vector_unsigned_short, vector_unsigned_short }
548+
vector_mladd! { vector_unsigned_short, vector_signed_short, vector_signed_short }
549+
vector_mladd! { vector_signed_short, vector_unsigned_short, vector_signed_short }
550+
vector_mladd! { vector_signed_short, vector_signed_short, vector_signed_short }
433551
}
434552

435553
/// Vector add.
@@ -476,6 +594,15 @@ pub unsafe fn vec_madds(
476594
vmhaddshs(a, b, c)
477595
}
478596

597+
/// Vector Multiply Low and Add Unsigned Half Word
598+
#[inline]
599+
#[target_feature(enable = "altivec")]
600+
pub unsafe fn vec_mladd<T, U>(a: T, b: U, c: U) -> <T as sealed::VectorMladd<U>>::Result
601+
where
602+
T: sealed::VectorMladd<U> {
603+
a.vec_mladd(b, c)
604+
}
605+
479606
/// Vector Multiply Round and Add Saturated
480607
#[inline]
481608
#[target_feature(enable = "altivec")]
@@ -486,6 +613,14 @@ pub unsafe fn vec_mradds(
486613
vmhraddshs(a, b, c)
487614
}
488615

616+
/// Vector Multiply Sum
617+
#[inline]
618+
#[target_feature(enable = "altivec")]
619+
pub unsafe fn vec_msum<T, B, U>(a: T, b: B, c: U) -> U
620+
where T: sealed::VectorMsum<B, U> {
621+
a.vec_msum(b, c)
622+
}
623+
489624
/// Vector Multiply Sum Saturated
490625
#[inline]
491626
#[target_feature(enable = "altivec")]
@@ -655,6 +790,157 @@ mod tests {
655790
assert_eq!(d, ::mem::transmute(vec_mradds(a, b, c)));
656791
}
657792

793+
macro_rules! test_vec_mladd {
794+
{$name:ident, $sa:ident, $la:ident, $sbc:ident, $lbc:ident, $sd:ident,
795+
[$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
796+
#[simd_test(enable = "altivec")]
797+
unsafe fn $name() {
798+
let a: $la = ::mem::transmute($sa::new($($a),+));
799+
let b: $lbc = ::mem::transmute($sbc::new($($b),+));
800+
let c = ::mem::transmute($sbc::new($($c),+));
801+
let d = $sd::new($($d),+);
802+
803+
assert_eq!(d, ::mem::transmute(vec_mladd(a, b, c)));
804+
}
805+
}
806+
}
807+
808+
test_vec_mladd! { test_vec_mladd_u16x8_u16x8, u16x8, vector_unsigned_short, u16x8, vector_unsigned_short, u16x8,
809+
[0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
810+
}
811+
test_vec_mladd! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_unsigned_short, i16x8,
812+
[0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
813+
}
814+
test_vec_mladd! { test_vec_mladd_i16x8_u16x8, i16x8, vector_signed_short, u16x8, vector_unsigned_short, i16x8,
815+
[0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
816+
}
817+
test_vec_mladd! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_unsigned_short, i16x8,
818+
[0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
819+
}
820+
821+
#[simd_test(enable = "altivec")]
822+
unsafe fn test_vec_msum_unsigned_char() {
823+
let a: vector_unsigned_char = ::mem::transmute(u8x16::new(
824+
0,
825+
1,
826+
2,
827+
3,
828+
829+
4,
830+
5,
831+
6,
832+
7,
833+
834+
0,
835+
1,
836+
2,
837+
3,
838+
839+
4,
840+
5,
841+
6,
842+
7,
843+
));
844+
let b: vector_unsigned_char =
845+
::mem::transmute(u8x16::new(255, 255, 255, 255, 255, 255, 255, 255,
846+
255, 255, 255, 255, 255, 255, 255, 255));
847+
let c: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
848+
let d = u32x4::new(
849+
(0 + 1 + 2 + 3) * 255 + 0,
850+
(4 + 5 + 6 + 7) * 255 + 1,
851+
(0 + 1 + 2 + 3) * 255 + 2,
852+
(4 + 5 + 6 + 7) * 255 + 3,
853+
);
854+
855+
assert_eq!(d, ::mem::transmute(vec_msum(a, b, c)));
856+
}
857+
858+
#[simd_test(enable = "altivec")]
859+
unsafe fn test_vec_msum_signed_char() {
860+
let a: vector_signed_char = ::mem::transmute(i8x16::new(
861+
0,
862+
-1,
863+
2,
864+
-3,
865+
866+
1,
867+
-1,
868+
1,
869+
-1,
870+
871+
0,
872+
1,
873+
2,
874+
3,
875+
876+
4,
877+
-5,
878+
-6,
879+
-7,
880+
));
881+
let b: vector_unsigned_char =
882+
::mem::transmute(i8x16::new(1, 1, 1, 1, 1, 1, 1, 1,
883+
1, 1, 1, 1, 1, 1, 1, 1));
884+
let c: vector_signed_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
885+
let d = i32x4::new(
886+
(0 - 1 + 2 - 3) + 0,
887+
(0) + 1,
888+
(0 + 1 + 2 + 3) + 2,
889+
(4 - 5 - 6 - 7) + 3,
890+
);
891+
892+
assert_eq!(d, ::mem::transmute(vec_msum(a, b, c)));
893+
}
894+
#[simd_test(enable = "altivec")]
895+
unsafe fn test_vec_msum_unsigned_short() {
896+
let a: vector_unsigned_short = ::mem::transmute(u16x8::new(
897+
0 * 256,
898+
1 * 256,
899+
2 * 256,
900+
3 * 256,
901+
4 * 256,
902+
5 * 256,
903+
6 * 256,
904+
7 * 256,
905+
));
906+
let b: vector_unsigned_short =
907+
::mem::transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
908+
let c: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
909+
let d = u32x4::new(
910+
(0 + 1) * 256 * 256 + 0,
911+
(2 + 3) * 256 * 256 + 1,
912+
(4 + 5) * 256 * 256 + 2,
913+
(6 + 7) * 256 * 256 + 3,
914+
);
915+
916+
assert_eq!(d, ::mem::transmute(vec_msum(a, b, c)));
917+
}
918+
919+
#[simd_test(enable = "altivec")]
920+
unsafe fn test_vec_msum_signed_short() {
921+
let a: vector_signed_short = ::mem::transmute(i16x8::new(
922+
0 * 256,
923+
-1 * 256,
924+
2 * 256,
925+
-3 * 256,
926+
4 * 256,
927+
-5 * 256,
928+
6 * 256,
929+
-7 * 256,
930+
));
931+
let b: vector_signed_short =
932+
::mem::transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
933+
let c: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
934+
let d = i32x4::new(
935+
(0 - 1) * 256 * 256 + 0,
936+
(2 - 3) * 256 * 256 + 1,
937+
(4 - 5) * 256 * 256 + 2,
938+
(6 - 7) * 256 * 256 + 3,
939+
);
940+
941+
assert_eq!(d, ::mem::transmute(vec_msum(a, b, c)));
942+
}
943+
658944
#[simd_test(enable = "altivec")]
659945
unsafe fn test_vec_msums_unsigned() {
660946
let a: vector_unsigned_short = ::mem::transmute(u16x8::new(

0 commit comments

Comments
 (0)