@@ -81,12 +81,99 @@ extern "C" {
81
81
#[ link_name = "llvm.ppc.altivec.vmsumshm" ]
82
82
fn vmsumshm (
83
83
a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int ;
84
+ #[ link_name = "llvm.ppc.altivec.vmaddfp" ]
85
+ fn vmaddfp (
86
+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float ;
87
+ #[ link_name = "llvm.ppc.altivec.vnmsubfp" ]
88
+ fn vnmsubfp (
89
+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float ;
90
+ #[ link_name = "llvm.ppc.altivec.vsum2sws" ]
91
+ fn vsum2sws ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int ;
92
+ #[ link_name = "llvm.ppc.altivec.vsum4ubs" ]
93
+ fn vsum4ubs ( a : vector_unsigned_char , b : vector_unsigned_int ) -> vector_unsigned_int ;
94
+ #[ link_name = "llvm.ppc.altivec.vsum4sbs" ]
95
+ fn vsum4sbs ( a : vector_signed_char , b : vector_signed_int ) -> vector_signed_int ;
96
+ #[ link_name = "llvm.ppc.altivec.vsum4shs" ]
97
+ fn vsum4shs ( a : vector_signed_short , b : vector_signed_int ) -> vector_signed_int ;
84
98
}
85
99
86
100
mod sealed {
87
101
88
102
use super :: * ;
89
103
104
+ #[ inline]
105
+ #[ target_feature( enable = "altivec" ) ]
106
+ #[ cfg_attr( test, assert_instr( vsum4ubs) ) ]
107
+ unsafe fn vec_vsum4ubs ( a : vector_unsigned_char , b : vector_unsigned_int ) -> vector_unsigned_int {
108
+ vsum4ubs ( a, b)
109
+ }
110
+
111
+ #[ inline]
112
+ #[ target_feature( enable = "altivec" ) ]
113
+ #[ cfg_attr( test, assert_instr( vsum4sbs) ) ]
114
+ unsafe fn vec_vsum4sbs ( a : vector_signed_char , b : vector_signed_int ) -> vector_signed_int {
115
+ vsum4sbs ( a, b)
116
+ }
117
+
118
+ #[ inline]
119
+ #[ target_feature( enable = "altivec" ) ]
120
+ #[ cfg_attr( test, assert_instr( vsum4shs) ) ]
121
+ unsafe fn vec_vsum4shs ( a : vector_signed_short , b : vector_signed_int ) -> vector_signed_int {
122
+ vsum4shs ( a, b)
123
+ }
124
+
125
+
126
+ pub trait VectorSum4s < Other > {
127
+ unsafe fn vec_sum4s ( self , b : Other ) -> Other ;
128
+ }
129
+
130
+ impl VectorSum4s < vector_unsigned_int > for vector_unsigned_char {
131
+ #[ inline]
132
+ #[ target_feature( enable = "altivec" ) ]
133
+ unsafe fn vec_sum4s ( self , b : vector_unsigned_int ) -> vector_unsigned_int {
134
+ vsum4ubs ( self , b)
135
+ }
136
+ }
137
+
138
+ impl VectorSum4s < vector_signed_int > for vector_signed_char {
139
+ #[ inline]
140
+ #[ target_feature( enable = "altivec" ) ]
141
+ unsafe fn vec_sum4s ( self , b : vector_signed_int ) -> vector_signed_int {
142
+ vsum4sbs ( self , b)
143
+ }
144
+ }
145
+
146
+ impl VectorSum4s < vector_signed_int > for vector_signed_short {
147
+ #[ inline]
148
+ #[ target_feature( enable = "altivec" ) ]
149
+ unsafe fn vec_sum4s ( self , b : vector_signed_int ) -> vector_signed_int {
150
+ vsum4shs ( self , b)
151
+ }
152
+ }
153
+
154
+ #[ inline]
155
+ #[ target_feature( enable = "altivec" ) ]
156
+ #[ cfg_attr( test, assert_instr( vsum2sws) ) ]
157
+ unsafe fn vec_vsum2sws ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
158
+ vsum2sws ( a, b)
159
+ }
160
+
161
+ #[ inline]
162
+ #[ target_feature( enable = "altivec" ) ]
163
+ #[ cfg_attr( test, assert_instr( vnmsubfp) ) ]
164
+ unsafe fn vec_vnmsubfp (
165
+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
166
+ vnmsubfp ( a, b, c)
167
+ }
168
+
169
+ #[ inline]
170
+ #[ target_feature( enable = "altivec" ) ]
171
+ #[ cfg_attr( test, assert_instr( vmaddfp) ) ]
172
+ unsafe fn vec_vmaddfp (
173
+ a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
174
+ vmaddfp ( a, b, c)
175
+ }
176
+
90
177
#[ inline]
91
178
#[ target_feature( enable = "altivec" ) ]
92
179
#[ cfg_attr( test, assert_instr( vmsumubm) ) ]
@@ -582,6 +669,20 @@ mod endian {
582
669
583
670
b. vec_vperm ( a, c)
584
671
}
672
+
673
+ /// Vector Sum Across Partial (1/2) Saturated
674
+ #[ inline]
675
+ #[ target_feature( enable = "altivec" ) ]
676
+ pub unsafe fn vec_sum2s ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
677
+ // vsum2sws has big-endian bias
678
+ //
679
+ // swap the even b elements with the odd ones
680
+ let flip = :: mem:: transmute ( u8x16:: new ( 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 11 ) ) ;
681
+ let b = vec_perm ( b, b, flip) ;
682
+ let c = vsum2sws ( a, b) ;
683
+
684
+ vec_perm ( c, c, flip)
685
+ }
585
686
}
586
687
587
688
/// Vector Multiply Add Saturated
@@ -629,6 +730,29 @@ pub unsafe fn vec_msums<T, U>(a: T, b: T, c: U) -> U
629
730
a. vec_msums ( b, c)
630
731
}
631
732
733
+ /// Vector Multiply Add
734
+ #[ inline]
735
+ #[ target_feature( enable = "altivec" ) ]
736
+ pub unsafe fn vec_madd ( a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
737
+ vmaddfp ( a, b, c)
738
+ }
739
+
740
+ /// Vector Negative Multiply Subtract
741
+ #[ inline]
742
+ #[ target_feature( enable = "altivec" ) ]
743
+ pub unsafe fn vec_nmsub ( a : vector_float , b : vector_float , c : vector_float ) -> vector_float {
744
+ vnmsubfp ( a, b, c)
745
+ }
746
+
747
+ /// Vector Sum Across Partial (1/4) Saturated
748
+ #[ inline]
749
+ #[ target_feature( enable = "altivec" ) ]
750
+ pub unsafe fn vec_sum4s < T , U > ( a : T , b : U ) -> U
751
+ where
752
+ T : sealed:: VectorSum4s < U > {
753
+ a. vec_sum4s ( b)
754
+ }
755
+
632
756
#[ cfg( target_endian = "big" ) ]
633
757
mod endian {
634
758
use super :: * ;
@@ -641,6 +765,13 @@ mod endian {
641
765
{
642
766
a. vec_vperm ( b, c)
643
767
}
768
+
769
+ /// Vector Sum Across Partial (1/2) Saturated
770
+ #[ inline]
771
+ #[ target_feature( enable = "altivec" ) ]
772
+ pub unsafe fn vec_sum2s ( a : vector_signed_int , b : vector_signed_int ) -> vector_signed_int {
773
+ vsum2sws ( a, b)
774
+ }
644
775
}
645
776
646
777
pub use self :: endian:: * ;
@@ -768,6 +899,34 @@ mod tests {
768
899
assert_eq ! ( d, :: mem:: transmute( vec_madds( a, b, c) ) ) ;
769
900
}
770
901
902
+ #[ simd_test( enable = "altivec" ) ]
903
+ unsafe fn test_vec_madd_float ( ) {
904
+ let a: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
905
+ let b: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
906
+ let c: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
907
+ let d = f32x4:: new (
908
+ 0.1 * 0.1 + 0.1 ,
909
+ 0.2 * 0.2 + 0.2 ,
910
+ 0.3 * 0.3 + 0.3 ,
911
+ 0.4 * 0.4 + 0.4 ) ;
912
+
913
+ assert_eq ! ( d, :: mem:: transmute( vec_madd( a, b, c) ) ) ;
914
+ }
915
+
916
+ #[ simd_test( enable = "altivec" ) ]
917
+ unsafe fn test_vec_nmsub_float ( ) {
918
+ let a: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
919
+ let b: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
920
+ let c: vector_float = :: mem:: transmute ( f32x4:: new ( 0.1 , 0.2 , 0.3 , 0.4 ) ) ;
921
+ let d = f32x4:: new (
922
+ -( 0.1 * 0.1 - 0.1 ) ,
923
+ -( 0.2 * 0.2 - 0.2 ) ,
924
+ -( 0.3 * 0.3 - 0.3 ) ,
925
+ -( 0.4 * 0.4 - 0.4 ) ,
926
+ ) ;
927
+ assert_eq ! ( d, :: mem:: transmute( vec_nmsub( a, b, c) ) ) ;
928
+ }
929
+
771
930
#[ simd_test( enable = "altivec" ) ]
772
931
unsafe fn test_vec_mradds ( ) {
773
932
let a: vector_signed_short = :: mem:: transmute ( i16x8:: new (
@@ -991,6 +1150,109 @@ mod tests {
991
1150
assert_eq ! ( d, :: mem:: transmute( vec_msums( a, b, c) ) ) ;
992
1151
}
993
1152
1153
+ #[ simd_test( enable = "altivec" ) ]
1154
+ unsafe fn test_vec_sum2s ( ) {
1155
+ let a: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1156
+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1157
+ let d = i32x4:: new (
1158
+ 0 ,
1159
+ 0 + 1 + 1 ,
1160
+ 0 ,
1161
+ 2 + 3 + 3 ) ;
1162
+
1163
+ assert_eq ! ( d, :: mem:: transmute( vec_sum2s( a, b) ) ) ;
1164
+ }
1165
+
1166
+ #[ simd_test( enable = "altivec" ) ]
1167
+ unsafe fn test_vec_sum4s_unsigned_char ( ) {
1168
+ let a: vector_unsigned_char = :: mem:: transmute ( u8x16:: new (
1169
+ 0 ,
1170
+ 1 ,
1171
+ 2 ,
1172
+ 3 ,
1173
+
1174
+ 4 ,
1175
+ 5 ,
1176
+ 6 ,
1177
+ 7 ,
1178
+
1179
+ 0 ,
1180
+ 1 ,
1181
+ 2 ,
1182
+ 3 ,
1183
+
1184
+ 4 ,
1185
+ 5 ,
1186
+ 6 ,
1187
+ 7 ,
1188
+ ) ) ;
1189
+ let b: vector_unsigned_int = :: mem:: transmute ( u32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1190
+ let d = u32x4:: new (
1191
+ 0 + 1 + 2 + 3 + 0 ,
1192
+ 4 + 5 + 6 + 7 + 1 ,
1193
+ 0 + 1 + 2 + 3 + 2 ,
1194
+ 4 + 5 + 6 + 7 + 3 ,
1195
+ ) ;
1196
+
1197
+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1198
+ }
1199
+ #[ simd_test( enable = "altivec" ) ]
1200
+ unsafe fn test_vec_sum4s_signed_char ( ) {
1201
+ let a: vector_signed_char = :: mem:: transmute ( i8x16:: new (
1202
+ 0 ,
1203
+ 1 ,
1204
+ 2 ,
1205
+ 3 ,
1206
+
1207
+ 4 ,
1208
+ 5 ,
1209
+ 6 ,
1210
+ 7 ,
1211
+
1212
+ 0 ,
1213
+ 1 ,
1214
+ 2 ,
1215
+ 3 ,
1216
+
1217
+ 4 ,
1218
+ 5 ,
1219
+ 6 ,
1220
+ 7 ,
1221
+ ) ) ;
1222
+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1223
+ let d = i32x4:: new (
1224
+ 0 + 1 + 2 + 3 + 0 ,
1225
+ 4 + 5 + 6 + 7 + 1 ,
1226
+ 0 + 1 + 2 + 3 + 2 ,
1227
+ 4 + 5 + 6 + 7 + 3 ,
1228
+ ) ;
1229
+
1230
+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1231
+ }
1232
+ #[ simd_test( enable = "altivec" ) ]
1233
+ unsafe fn test_vec_sum4s_signed_short ( ) {
1234
+ let a: vector_signed_short = :: mem:: transmute ( i16x8:: new (
1235
+ 0 ,
1236
+ 1 ,
1237
+ 2 ,
1238
+ 3 ,
1239
+
1240
+ 4 ,
1241
+ 5 ,
1242
+ 6 ,
1243
+ 7 ,
1244
+ ) ) ;
1245
+ let b: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
1246
+ let d = i32x4:: new (
1247
+ 0 + 1 + 0 ,
1248
+ 2 + 3 + 1 ,
1249
+ 4 + 5 + 2 ,
1250
+ 6 + 7 + 3 ,
1251
+ ) ;
1252
+
1253
+ assert_eq ! ( d, :: mem:: transmute( vec_sum4s( a, b) ) ) ;
1254
+ }
1255
+
994
1256
#[ simd_test( enable = "altivec" ) ]
995
1257
unsafe fn vec_add_i32x4_i32x4 ( ) {
996
1258
let x = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
0 commit comments