Skip to content

Commit 417add6

Browse files
lu-zeroalexcrichton
authored andcommitted
Add more mixed arithmetic instructions (rust-lang#541)
* Add Vector Multiply Add * Add Vector Negative Multiply Subtract * Add Vector Sum Across Partial (1/2) Saturated * Add Vector Sum Across Partial (1/4) Saturated
1 parent 3e2efe9 commit 417add6

File tree

1 file changed

+262
-0
lines changed

1 file changed

+262
-0
lines changed

coresimd/powerpc/altivec.rs

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,99 @@ extern "C" {
8181
#[link_name = "llvm.ppc.altivec.vmsumshm"]
8282
fn vmsumshm(
8383
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int;
84+
#[link_name = "llvm.ppc.altivec.vmaddfp"]
85+
fn vmaddfp(
86+
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
87+
#[link_name = "llvm.ppc.altivec.vnmsubfp"]
88+
fn vnmsubfp(
89+
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
90+
#[link_name = "llvm.ppc.altivec.vsum2sws"]
91+
fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
92+
#[link_name = "llvm.ppc.altivec.vsum4ubs"]
93+
fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int;
94+
#[link_name = "llvm.ppc.altivec.vsum4sbs"]
95+
fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int;
96+
#[link_name = "llvm.ppc.altivec.vsum4shs"]
97+
fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int;
8498
}
8599

86100
mod sealed {
87101

88102
use super::*;
89103

104+
#[inline]
105+
#[target_feature(enable = "altivec")]
106+
#[cfg_attr(test, assert_instr(vsum4ubs))]
107+
unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int {
108+
vsum4ubs(a, b)
109+
}
110+
111+
#[inline]
112+
#[target_feature(enable = "altivec")]
113+
#[cfg_attr(test, assert_instr(vsum4sbs))]
114+
unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int {
115+
vsum4sbs(a, b)
116+
}
117+
118+
#[inline]
119+
#[target_feature(enable = "altivec")]
120+
#[cfg_attr(test, assert_instr(vsum4shs))]
121+
unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int {
122+
vsum4shs(a, b)
123+
}
124+
125+
126+
pub trait VectorSum4s<Other> {
127+
unsafe fn vec_sum4s(self, b: Other) -> Other;
128+
}
129+
130+
impl VectorSum4s<vector_unsigned_int> for vector_unsigned_char {
131+
#[inline]
132+
#[target_feature(enable = "altivec")]
133+
unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int {
134+
vsum4ubs(self, b)
135+
}
136+
}
137+
138+
impl VectorSum4s<vector_signed_int> for vector_signed_char {
139+
#[inline]
140+
#[target_feature(enable = "altivec")]
141+
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
142+
vsum4sbs(self, b)
143+
}
144+
}
145+
146+
impl VectorSum4s<vector_signed_int> for vector_signed_short {
147+
#[inline]
148+
#[target_feature(enable = "altivec")]
149+
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
150+
vsum4shs(self, b)
151+
}
152+
}
153+
154+
#[inline]
155+
#[target_feature(enable = "altivec")]
156+
#[cfg_attr(test, assert_instr(vsum2sws))]
157+
unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
158+
vsum2sws(a, b)
159+
}
160+
161+
#[inline]
162+
#[target_feature(enable = "altivec")]
163+
#[cfg_attr(test, assert_instr(vnmsubfp))]
164+
unsafe fn vec_vnmsubfp(
165+
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
166+
vnmsubfp(a, b, c)
167+
}
168+
169+
#[inline]
170+
#[target_feature(enable = "altivec")]
171+
#[cfg_attr(test, assert_instr(vmaddfp))]
172+
unsafe fn vec_vmaddfp(
173+
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
174+
vmaddfp(a, b, c)
175+
}
176+
90177
#[inline]
91178
#[target_feature(enable = "altivec")]
92179
#[cfg_attr(test, assert_instr(vmsumubm))]
@@ -582,6 +669,20 @@ mod endian {
582669

583670
b.vec_vperm(a, c)
584671
}
672+
673+
/// Vector Sum Across Partial (1/2) Saturated
674+
#[inline]
675+
#[target_feature(enable = "altivec")]
676+
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
677+
// vsum2sws has big-endian bias
678+
//
679+
// swap the even b elements with the odd ones
680+
let flip = ::mem::transmute(u8x16::new(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11));
681+
let b = vec_perm(b, b, flip);
682+
let c = vsum2sws(a, b);
683+
684+
vec_perm(c, c, flip)
685+
}
585686
}
586687

587688
/// Vector Multiply Add Saturated
@@ -629,6 +730,29 @@ pub unsafe fn vec_msums<T, U>(a: T, b: T, c: U) -> U
629730
a.vec_msums(b, c)
630731
}
631732

733+
/// Vector Multiply Add
734+
#[inline]
735+
#[target_feature(enable = "altivec")]
736+
pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
737+
vmaddfp(a, b, c)
738+
}
739+
740+
/// Vector Negative Multiply Subtract
741+
#[inline]
742+
#[target_feature(enable = "altivec")]
743+
pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
744+
vnmsubfp(a, b, c)
745+
}
746+
747+
/// Vector Sum Across Partial (1/4) Saturated
748+
#[inline]
749+
#[target_feature(enable = "altivec")]
750+
pub unsafe fn vec_sum4s<T, U>(a: T, b: U) -> U
751+
where
752+
T: sealed::VectorSum4s<U> {
753+
a.vec_sum4s(b)
754+
}
755+
632756
#[cfg(target_endian = "big")]
633757
mod endian {
634758
use super::*;
@@ -641,6 +765,13 @@ mod endian {
641765
{
642766
a.vec_vperm(b, c)
643767
}
768+
769+
/// Vector Sum Across Partial (1/2) Saturated
770+
#[inline]
771+
#[target_feature(enable = "altivec")]
772+
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
773+
vsum2sws(a, b)
774+
}
644775
}
645776

646777
pub use self::endian::*;
@@ -768,6 +899,34 @@ mod tests {
768899
assert_eq!(d, ::mem::transmute(vec_madds(a, b, c)));
769900
}
770901

902+
#[simd_test(enable = "altivec")]
903+
unsafe fn test_vec_madd_float() {
904+
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
905+
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
906+
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
907+
let d = f32x4::new(
908+
0.1 * 0.1 + 0.1,
909+
0.2 * 0.2 + 0.2,
910+
0.3 * 0.3 + 0.3,
911+
0.4 * 0.4 + 0.4);
912+
913+
assert_eq!(d, ::mem::transmute(vec_madd(a, b, c)));
914+
}
915+
916+
#[simd_test(enable = "altivec")]
917+
unsafe fn test_vec_nmsub_float() {
918+
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
919+
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
920+
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
921+
let d = f32x4::new(
922+
-(0.1 * 0.1 - 0.1),
923+
-(0.2 * 0.2 - 0.2),
924+
-(0.3 * 0.3 - 0.3),
925+
-(0.4 * 0.4 - 0.4),
926+
);
927+
assert_eq!(d, ::mem::transmute(vec_nmsub(a, b, c)));
928+
}
929+
771930
#[simd_test(enable = "altivec")]
772931
unsafe fn test_vec_mradds() {
773932
let a: vector_signed_short = ::mem::transmute(i16x8::new(
@@ -991,6 +1150,109 @@ mod tests {
9911150
assert_eq!(d, ::mem::transmute(vec_msums(a, b, c)));
9921151
}
9931152

1153+
#[simd_test(enable = "altivec")]
1154+
unsafe fn test_vec_sum2s() {
1155+
let a: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
1156+
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
1157+
let d = i32x4::new(
1158+
0,
1159+
0 + 1 + 1,
1160+
0,
1161+
2 + 3 + 3);
1162+
1163+
assert_eq!(d, ::mem::transmute(vec_sum2s(a, b)));
1164+
}
1165+
1166+
#[simd_test(enable = "altivec")]
1167+
unsafe fn test_vec_sum4s_unsigned_char() {
1168+
let a: vector_unsigned_char = ::mem::transmute(u8x16::new(
1169+
0,
1170+
1,
1171+
2,
1172+
3,
1173+
1174+
4,
1175+
5,
1176+
6,
1177+
7,
1178+
1179+
0,
1180+
1,
1181+
2,
1182+
3,
1183+
1184+
4,
1185+
5,
1186+
6,
1187+
7,
1188+
));
1189+
let b: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
1190+
let d = u32x4::new(
1191+
0 + 1 + 2 + 3 + 0,
1192+
4 + 5 + 6 + 7 + 1,
1193+
0 + 1 + 2 + 3 + 2,
1194+
4 + 5 + 6 + 7 + 3,
1195+
);
1196+
1197+
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
1198+
}
1199+
#[simd_test(enable = "altivec")]
1200+
unsafe fn test_vec_sum4s_signed_char() {
1201+
let a: vector_signed_char = ::mem::transmute(i8x16::new(
1202+
0,
1203+
1,
1204+
2,
1205+
3,
1206+
1207+
4,
1208+
5,
1209+
6,
1210+
7,
1211+
1212+
0,
1213+
1,
1214+
2,
1215+
3,
1216+
1217+
4,
1218+
5,
1219+
6,
1220+
7,
1221+
));
1222+
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
1223+
let d = i32x4::new(
1224+
0 + 1 + 2 + 3 + 0,
1225+
4 + 5 + 6 + 7 + 1,
1226+
0 + 1 + 2 + 3 + 2,
1227+
4 + 5 + 6 + 7 + 3,
1228+
);
1229+
1230+
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
1231+
}
1232+
#[simd_test(enable = "altivec")]
1233+
unsafe fn test_vec_sum4s_signed_short() {
1234+
let a: vector_signed_short = ::mem::transmute(i16x8::new(
1235+
0,
1236+
1,
1237+
2,
1238+
3,
1239+
1240+
4,
1241+
5,
1242+
6,
1243+
7,
1244+
));
1245+
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
1246+
let d = i32x4::new(
1247+
0 + 1 + 0,
1248+
2 + 3 + 1,
1249+
4 + 5 + 2,
1250+
6 + 7 + 3,
1251+
);
1252+
1253+
assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
1254+
}
1255+
9941256
#[simd_test(enable = "altivec")]
9951257
unsafe fn vec_add_i32x4_i32x4() {
9961258
let x = i32x4::new(1, 2, 3, 4);

0 commit comments

Comments
 (0)