@@ -69,12 +69,92 @@ extern "C" {
69
69
#[ link_name = "llvm.ppc.altivec.vmsumshs" ]
70
70
fn vmsumshs (
71
71
a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int ;
72
+ #[ link_name = "llvm.ppc.altivec.vmsumubm" ]
73
+ fn vmsumubm (
74
+ a : vector_unsigned_char , b : vector_unsigned_char , c : vector_unsigned_int ) -> vector_unsigned_int ;
75
+ #[ link_name = "llvm.ppc.altivec.vmsummbm" ]
76
+ fn vmsummbm (
77
+ a : vector_signed_char , b : vector_unsigned_char , c : vector_signed_int ) -> vector_signed_int ;
78
+ #[ link_name = "llvm.ppc.altivec.vmsumuhm" ]
79
+ fn vmsumuhm (
80
+ a : vector_unsigned_short , b : vector_unsigned_short , c : vector_unsigned_int ) -> vector_unsigned_int ;
81
+ #[ link_name = "llvm.ppc.altivec.vmsumshm" ]
82
+ fn vmsumshm (
83
+ a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int ;
72
84
}
73
85
74
86
mod sealed {
75
87
76
88
use super :: * ;
77
89
90
+ #[ inline]
91
+ #[ target_feature( enable = "altivec" ) ]
92
+ #[ cfg_attr( test, assert_instr( vmsumubm) ) ]
93
+ unsafe fn vec_vmsumubm (
94
+ a : vector_unsigned_char , b : vector_unsigned_char , c : vector_unsigned_int ) -> vector_unsigned_int {
95
+ vmsumubm ( a, b, c)
96
+ }
97
+
98
+ #[ inline]
99
+ #[ target_feature( enable = "altivec" ) ]
100
+ #[ cfg_attr( test, assert_instr( vmsummbm) ) ]
101
+ unsafe fn vec_vmsummbm (
102
+ a : vector_signed_char , b : vector_unsigned_char , c : vector_signed_int ) -> vector_signed_int {
103
+ vmsummbm ( a, b, c)
104
+ }
105
+
106
+ #[ inline]
107
+ #[ target_feature( enable = "altivec" ) ]
108
+ #[ cfg_attr( test, assert_instr( vmsumuhm) ) ]
109
+ unsafe fn vec_vmsumuhm (
110
+ a : vector_unsigned_short , b : vector_unsigned_short , c : vector_unsigned_int ) -> vector_unsigned_int {
111
+ vmsumuhm ( a, b, c)
112
+ }
113
+
114
+ #[ inline]
115
+ #[ target_feature( enable = "altivec" ) ]
116
+ #[ cfg_attr( test, assert_instr( vmsumshm) ) ]
117
+ unsafe fn vec_vmsumshm (
118
+ a : vector_signed_short , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int {
119
+ vmsumshm ( a, b, c)
120
+ }
121
+
122
+ pub trait VectorMsum < B , Other > {
123
+ unsafe fn vec_msum ( self , b : B , c : Other ) -> Other ;
124
+ }
125
+
126
+ impl VectorMsum < vector_unsigned_char , vector_unsigned_int > for vector_unsigned_char {
127
+ #[ inline]
128
+ #[ target_feature( enable = "altivec" ) ]
129
+ unsafe fn vec_msum ( self , b : vector_unsigned_char , c : vector_unsigned_int ) -> vector_unsigned_int {
130
+ vmsumubm ( self , b, c)
131
+ }
132
+ }
133
+
134
+ impl VectorMsum < vector_unsigned_char , vector_signed_int > for vector_signed_char {
135
+ #[ inline]
136
+ #[ target_feature( enable = "altivec" ) ]
137
+ unsafe fn vec_msum ( self , b : vector_unsigned_char , c : vector_signed_int ) -> vector_signed_int {
138
+ vmsummbm ( self , b, c)
139
+ }
140
+ }
141
+
142
+ impl VectorMsum < vector_unsigned_short , vector_unsigned_int > for vector_unsigned_short {
143
+ #[ inline]
144
+ #[ target_feature( enable = "altivec" ) ]
145
+ unsafe fn vec_msum ( self , b : vector_unsigned_short , c : vector_unsigned_int ) -> vector_unsigned_int {
146
+ vmsumuhm ( self , b, c)
147
+ }
148
+ }
149
+
150
+ impl VectorMsum < vector_signed_short , vector_signed_int > for vector_signed_short {
151
+ #[ inline]
152
+ #[ target_feature( enable = "altivec" ) ]
153
+ unsafe fn vec_msum ( self , b : vector_signed_short , c : vector_signed_int ) -> vector_signed_int {
154
+ vmsumshm ( self , b, c)
155
+ }
156
+ }
157
+
78
158
#[ inline]
79
159
#[ target_feature( enable = "altivec" ) ]
80
160
#[ cfg_attr( test, assert_instr( vmsumuhs) ) ]
@@ -96,12 +176,16 @@ mod sealed {
96
176
}
97
177
98
178
impl VectorMsums < vector_unsigned_int > for vector_unsigned_short {
179
+ #[ inline]
180
+ #[ target_feature( enable = "altivec" ) ]
99
181
unsafe fn vec_msums ( self , b : Self , c : vector_unsigned_int ) -> vector_unsigned_int {
100
182
vmsumuhs ( self , b, c)
101
183
}
102
184
}
103
185
104
186
impl VectorMsums < vector_signed_int > for vector_signed_short {
187
+ #[ inline]
188
+ #[ target_feature( enable = "altivec" ) ]
105
189
unsafe fn vec_msums ( self , b : Self , c : vector_signed_int ) -> vector_signed_int {
106
190
vmsumshs ( self , b, c)
107
191
}
@@ -430,6 +514,40 @@ mod sealed {
430
514
vec_add_float_float ( self , other)
431
515
}
432
516
}
517
+
518
+ pub trait VectorMladd < Other > {
519
+ type Result ;
520
+ unsafe fn vec_mladd ( self , b : Other , c : Other ) -> Self :: Result ;
521
+ }
522
+
523
+ #[ inline]
524
+ #[ target_feature( enable = "altivec" ) ]
525
+ #[ cfg_attr( test, assert_instr( vmladduhm) ) ]
526
+ unsafe fn mladd ( a : i16x8 , b : i16x8 , c : i16x8 ) -> i16x8 {
527
+ simd_add ( simd_mul ( a, b) , c)
528
+ }
529
+
530
+ macro_rules! vector_mladd {
531
+ ( $a: ident, $bc: ident, $d: ident) => {
532
+ impl VectorMladd <$bc> for $a {
533
+ type Result = $d;
534
+ #[ inline]
535
+ #[ target_feature( enable = "altivec" ) ]
536
+ unsafe fn vec_mladd( self , b: $bc, c: $bc) -> Self :: Result {
537
+ let a: i16x8 = :: mem:: transmute( self ) ;
538
+ let b: i16x8 = :: mem:: transmute( b) ;
539
+ let c: i16x8 = :: mem:: transmute( c) ;
540
+
541
+ :: mem:: transmute( mladd( a, b, c) )
542
+ }
543
+ }
544
+ }
545
+ }
546
+
547
+ vector_mladd ! { vector_unsigned_short, vector_unsigned_short, vector_unsigned_short }
548
+ vector_mladd ! { vector_unsigned_short, vector_signed_short, vector_signed_short }
549
+ vector_mladd ! { vector_signed_short, vector_unsigned_short, vector_signed_short }
550
+ vector_mladd ! { vector_signed_short, vector_signed_short, vector_signed_short }
433
551
}
434
552
435
553
/// Vector add.
@@ -476,6 +594,15 @@ pub unsafe fn vec_madds(
476
594
vmhaddshs ( a, b, c)
477
595
}
478
596
597
+ /// Vector Multiply Low and Add Unsigned Half Word
598
+ #[ inline]
599
+ #[ target_feature( enable = "altivec" ) ]
600
+ pub unsafe fn vec_mladd < T , U > ( a : T , b : U , c : U ) -> <T as sealed:: VectorMladd < U > >:: Result
601
+ where
602
+ T : sealed:: VectorMladd < U > {
603
+ a. vec_mladd ( b, c)
604
+ }
605
+
479
606
/// Vector Multiply Round and Add Saturated
480
607
#[ inline]
481
608
#[ target_feature( enable = "altivec" ) ]
@@ -486,6 +613,14 @@ pub unsafe fn vec_mradds(
486
613
vmhraddshs ( a, b, c)
487
614
}
488
615
616
+ /// Vector Multiply Sum
617
+ #[ inline]
618
+ #[ target_feature( enable = "altivec" ) ]
619
+ pub unsafe fn vec_msum < T , B , U > ( a : T , b : B , c : U ) -> U
620
+ where T : sealed:: VectorMsum < B , U > {
621
+ a. vec_msum ( b, c)
622
+ }
623
+
489
624
/// Vector Multiply Sum Saturated
490
625
#[ inline]
491
626
#[ target_feature( enable = "altivec" ) ]
@@ -655,6 +790,157 @@ mod tests {
655
790
assert_eq ! ( d, :: mem:: transmute( vec_mradds( a, b, c) ) ) ;
656
791
}
657
792
793
+ macro_rules! test_vec_mladd {
794
+ { $name: ident, $sa: ident, $la: ident, $sbc: ident, $lbc: ident, $sd: ident,
795
+ [ $( $a: expr) ,+] , [ $( $b: expr) ,+] , [ $( $c: expr) ,+] , [ $( $d: expr) ,+] } => {
796
+ #[ simd_test( enable = "altivec" ) ]
797
+ unsafe fn $name( ) {
798
+ let a: $la = :: mem:: transmute( $sa:: new( $( $a) ,+) ) ;
799
+ let b: $lbc = :: mem:: transmute( $sbc:: new( $( $b) ,+) ) ;
800
+ let c = :: mem:: transmute( $sbc:: new( $( $c) ,+) ) ;
801
+ let d = $sd:: new( $( $d) ,+) ;
802
+
803
+ assert_eq!( d, :: mem:: transmute( vec_mladd( a, b, c) ) ) ;
804
+ }
805
+ }
806
+ }
807
+
808
+ test_vec_mladd ! { test_vec_mladd_u16x8_u16x8, u16x8, vector_unsigned_short, u16x8, vector_unsigned_short, u16x8,
809
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 2 , 6 , 12 , 20 , 30 , 42 , 56 ]
810
+ }
811
+ test_vec_mladd ! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_unsigned_short, i16x8,
812
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 2 , 6 , 12 , 20 , 30 , 42 , 56 ]
813
+ }
814
+ test_vec_mladd ! { test_vec_mladd_i16x8_u16x8, i16x8, vector_signed_short, u16x8, vector_unsigned_short, i16x8,
815
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 2 , 6 , 12 , 20 , 30 , 42 , 56 ]
816
+ }
817
+ test_vec_mladd ! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_unsigned_short, i16x8,
818
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] , [ 0 , 2 , 6 , 12 , 20 , 30 , 42 , 56 ]
819
+ }
820
+
821
+ #[ simd_test( enable = "altivec" ) ]
822
+ unsafe fn test_vec_msum_unsigned_char ( ) {
823
+ let a: vector_unsigned_char = :: mem:: transmute ( u8x16:: new (
824
+ 0 ,
825
+ 1 ,
826
+ 2 ,
827
+ 3 ,
828
+
829
+ 4 ,
830
+ 5 ,
831
+ 6 ,
832
+ 7 ,
833
+
834
+ 0 ,
835
+ 1 ,
836
+ 2 ,
837
+ 3 ,
838
+
839
+ 4 ,
840
+ 5 ,
841
+ 6 ,
842
+ 7 ,
843
+ ) ) ;
844
+ let b: vector_unsigned_char =
845
+ :: mem:: transmute ( u8x16:: new ( 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ,
846
+ 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ) ) ;
847
+ let c: vector_unsigned_int = :: mem:: transmute ( u32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
848
+ let d = u32x4:: new (
849
+ ( 0 + 1 + 2 + 3 ) * 255 + 0 ,
850
+ ( 4 + 5 + 6 + 7 ) * 255 + 1 ,
851
+ ( 0 + 1 + 2 + 3 ) * 255 + 2 ,
852
+ ( 4 + 5 + 6 + 7 ) * 255 + 3 ,
853
+ ) ;
854
+
855
+ assert_eq ! ( d, :: mem:: transmute( vec_msum( a, b, c) ) ) ;
856
+ }
857
+
858
+ #[ simd_test( enable = "altivec" ) ]
859
+ unsafe fn test_vec_msum_signed_char ( ) {
860
+ let a: vector_signed_char = :: mem:: transmute ( i8x16:: new (
861
+ 0 ,
862
+ -1 ,
863
+ 2 ,
864
+ -3 ,
865
+
866
+ 1 ,
867
+ -1 ,
868
+ 1 ,
869
+ -1 ,
870
+
871
+ 0 ,
872
+ 1 ,
873
+ 2 ,
874
+ 3 ,
875
+
876
+ 4 ,
877
+ -5 ,
878
+ -6 ,
879
+ -7 ,
880
+ ) ) ;
881
+ let b: vector_unsigned_char =
882
+ :: mem:: transmute ( i8x16:: new ( 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
883
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ) ) ;
884
+ let c: vector_signed_int = :: mem:: transmute ( u32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
885
+ let d = i32x4:: new (
886
+ ( 0 - 1 + 2 - 3 ) + 0 ,
887
+ ( 0 ) + 1 ,
888
+ ( 0 + 1 + 2 + 3 ) + 2 ,
889
+ ( 4 - 5 - 6 - 7 ) + 3 ,
890
+ ) ;
891
+
892
+ assert_eq ! ( d, :: mem:: transmute( vec_msum( a, b, c) ) ) ;
893
+ }
894
+ #[ simd_test( enable = "altivec" ) ]
895
+ unsafe fn test_vec_msum_unsigned_short ( ) {
896
+ let a: vector_unsigned_short = :: mem:: transmute ( u16x8:: new (
897
+ 0 * 256 ,
898
+ 1 * 256 ,
899
+ 2 * 256 ,
900
+ 3 * 256 ,
901
+ 4 * 256 ,
902
+ 5 * 256 ,
903
+ 6 * 256 ,
904
+ 7 * 256 ,
905
+ ) ) ;
906
+ let b: vector_unsigned_short =
907
+ :: mem:: transmute ( u16x8:: new ( 256 , 256 , 256 , 256 , 256 , 256 , 256 , 256 ) ) ;
908
+ let c: vector_unsigned_int = :: mem:: transmute ( u32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
909
+ let d = u32x4:: new (
910
+ ( 0 + 1 ) * 256 * 256 + 0 ,
911
+ ( 2 + 3 ) * 256 * 256 + 1 ,
912
+ ( 4 + 5 ) * 256 * 256 + 2 ,
913
+ ( 6 + 7 ) * 256 * 256 + 3 ,
914
+ ) ;
915
+
916
+ assert_eq ! ( d, :: mem:: transmute( vec_msum( a, b, c) ) ) ;
917
+ }
918
+
919
+ #[ simd_test( enable = "altivec" ) ]
920
+ unsafe fn test_vec_msum_signed_short ( ) {
921
+ let a: vector_signed_short = :: mem:: transmute ( i16x8:: new (
922
+ 0 * 256 ,
923
+ -1 * 256 ,
924
+ 2 * 256 ,
925
+ -3 * 256 ,
926
+ 4 * 256 ,
927
+ -5 * 256 ,
928
+ 6 * 256 ,
929
+ -7 * 256 ,
930
+ ) ) ;
931
+ let b: vector_signed_short =
932
+ :: mem:: transmute ( i16x8:: new ( 256 , 256 , 256 , 256 , 256 , 256 , 256 , 256 ) ) ;
933
+ let c: vector_signed_int = :: mem:: transmute ( i32x4:: new ( 0 , 1 , 2 , 3 ) ) ;
934
+ let d = i32x4:: new (
935
+ ( 0 - 1 ) * 256 * 256 + 0 ,
936
+ ( 2 - 3 ) * 256 * 256 + 1 ,
937
+ ( 4 - 5 ) * 256 * 256 + 2 ,
938
+ ( 6 - 7 ) * 256 * 256 + 3 ,
939
+ ) ;
940
+
941
+ assert_eq ! ( d, :: mem:: transmute( vec_msum( a, b, c) ) ) ;
942
+ }
943
+
658
944
#[ simd_test( enable = "altivec" ) ]
659
945
unsafe fn test_vec_msums_unsigned ( ) {
660
946
let a: vector_unsigned_short = :: mem:: transmute ( u16x8:: new (
0 commit comments