@@ -54,8 +54,32 @@ extern "C" {
54
54
#[ link_name = "llvm.arm.sasx" ]
55
55
fn arm_sasx ( a : i32 , b : i32 ) -> i32 ;
56
56
57
- #[ cfg_attr ( not ( target_feature = "mclass" ) , link_name = "llvm.arm.sel" ) ]
57
+ #[ link_name = "llvm.arm.sel" ]
58
58
fn arm_sel ( a : i32 , b : i32 ) -> i32 ;
59
+
60
+ #[ link_name = "llvm.arm.shadd8" ]
61
+ fn arm_shadd8 ( a : i32 , b : i32 ) -> i32 ;
62
+
63
+ #[ link_name = "llvm.arm.shadd16" ]
64
+ fn arm_shadd16 ( a : i32 , b : i32 ) -> i32 ;
65
+
66
+ #[ link_name = "llvm.arm.shsub8" ]
67
+ fn arm_shsub8 ( a : i32 , b : i32 ) -> i32 ;
68
+
69
+ #[ link_name = "llvm.arm.shsub16" ]
70
+ fn arm_shsub16 ( a : i32 , b : i32 ) -> i32 ;
71
+
72
+ #[ link_name = "llvm.arm.smuad" ]
73
+ fn arm_smuad ( a : i32 , b : i32 ) -> i32 ;
74
+
75
+ #[ link_name = "llvm.arm.smuadx" ]
76
+ fn arm_smuadx ( a : i32 , b : i32 ) -> i32 ;
77
+
78
+ #[ link_name = "llvm.arm.smusd" ]
79
+ fn arm_smusd ( a : i32 , b : i32 ) -> i32 ;
80
+
81
+ #[ link_name = "llvm.arm.smusdx" ]
82
+ fn arm_smusdx ( a : i32 , b : i32 ) -> i32 ;
59
83
}
60
84
61
85
/// Signed saturating addition
@@ -201,6 +225,109 @@ pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
201
225
dsp_call ! ( arm_sel, a, b)
202
226
}
203
227
228
+ /// Signed halving parallel byte-wise addition.
229
+ ///
230
+ /// Returns the 8-bit signed equivalent of
231
+ ///
232
+ /// res\[0\] = (a\[0\] + b\[0\]) / 2
233
+ /// res\[1\] = (a\[1\] + b\[1\]) / 2
234
+ /// res\[2\] = (a\[2\] + b\[2\]) / 2
235
+ /// res\[3\] = (a\[3\] + b\[3\]) / 2
236
+ #[ inline]
237
+ #[ cfg_attr( test, assert_instr( shadd8) ) ]
238
+ pub unsafe fn shadd8 ( a : int8x4_t , b : int8x4_t ) -> int8x4_t {
239
+ dsp_call ! ( arm_shadd8, a, b)
240
+ }
241
+
242
+ /// Signed halving parallel halfword-wise addition.
243
+ ///
244
+ /// Returns the 16-bit signed equivalent of
245
+ ///
246
+ /// res\[0\] = (a\[0\] + b\[0\]) / 2
247
+ /// res\[1\] = (a\[1\] + b\[1\]) / 2
248
+ #[ inline]
249
+ #[ cfg_attr( test, assert_instr( shadd16) ) ]
250
+ pub unsafe fn shadd16 ( a : int16x2_t , b : int16x2_t ) -> int16x2_t {
251
+ dsp_call ! ( arm_shadd16, a, b)
252
+ }
253
+
254
+ /// Signed halving parallel byte-wise subtraction.
255
+ ///
256
+ /// Returns the 8-bit signed equivalent of
257
+ ///
258
+ /// res\[0\] = (a\[0\] - b\[0\]) / 2
259
+ /// res\[1\] = (a\[1\] - b\[1\]) / 2
260
+ /// res\[2\] = (a\[2\] - b\[2\]) / 2
261
+ /// res\[3\] = (a\[3\] - b\[3\]) / 2
262
+ #[ inline]
263
+ #[ cfg_attr( test, assert_instr( shsub8) ) ]
264
+ pub unsafe fn shsub8 ( a : int8x4_t , b : int8x4_t ) -> int8x4_t {
265
+ dsp_call ! ( arm_shsub8, a, b)
266
+ }
267
+
268
+ /// Signed halving parallel halfword-wise subtraction.
269
+ ///
270
+ /// Returns the 16-bit signed equivalent of
271
+ ///
272
+ /// res\[0\] = (a\[0\] - b\[0\]) / 2
273
+ /// res\[1\] = (a\[1\] - b\[1\]) / 2
274
+ #[ inline]
275
+ #[ cfg_attr( test, assert_instr( shsub16) ) ]
276
+ pub unsafe fn shsub16 ( a : int16x2_t , b : int16x2_t ) -> int16x2_t {
277
+ dsp_call ! ( arm_shsub16, a, b)
278
+ }
279
+
280
+ /// Signed Dual Multiply Add.
281
+ ///
282
+ /// Returns the equivalent of
283
+ ///
284
+ /// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
285
+ ///
286
+ /// and sets the Q flag if overflow occurs on the addition.
287
+ #[ cfg_attr( test, assert_instr( smuad) ) ]
288
+ pub unsafe fn smuad ( a : int16x2_t , b : int16x2_t ) -> i32 {
289
+ arm_smuad ( :: mem:: transmute ( a) , :: mem:: transmute ( b) )
290
+ }
291
+
292
+ /// Signed Dual Multiply Add Reversed.
293
+ ///
294
+ /// Returns the equivalent of
295
+ ///
296
+ /// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
297
+ ///
298
+ /// and sets the Q flag if overflow occurs on the addition.
299
+ #[ inline]
300
+ #[ cfg_attr( test, assert_instr( smuadx) ) ]
301
+ pub unsafe fn smuadx ( a : int16x2_t , b : int16x2_t ) -> i32 {
302
+ arm_smuadx ( :: mem:: transmute ( a) , :: mem:: transmute ( b) )
303
+ }
304
+
305
+ /// Signed Dual Multiply Subtract.
306
+ ///
307
+ /// Returns the equivalent of
308
+ ///
309
+ /// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
310
+ ///
311
+ /// and sets the Q flag if overflow occurs on the addition.
312
+ #[ inline]
313
+ #[ cfg_attr( test, assert_instr( smusd) ) ]
314
+ pub unsafe fn smusd ( a : int16x2_t , b : int16x2_t ) -> i32 {
315
+ arm_smusd ( :: mem:: transmute ( a) , :: mem:: transmute ( b) )
316
+ }
317
+
318
+ /// Signed Dual Multiply Subtract Reversed.
319
+ ///
320
+ /// Returns the equivalent of
321
+ ///
322
+ /// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
323
+ ///
324
+ /// and sets the Q flag if overflow occurs on the addition.
325
+ #[ inline]
326
+ #[ cfg_attr( test, assert_instr( smusdx) ) ]
327
+ pub unsafe fn smusdx ( a : int16x2_t , b : int16x2_t ) -> i32 {
328
+ arm_smusdx ( :: mem:: transmute ( a) , :: mem:: transmute ( b) )
329
+ }
330
+
204
331
#[ cfg( test) ]
205
332
mod tests {
206
333
use coresimd:: arm:: * ;
@@ -337,4 +464,88 @@ mod tests {
337
464
assert_eq ! ( r, c) ;
338
465
}
339
466
}
467
+
468
+ #[ test]
469
+ fn shadd8 ( ) {
470
+ unsafe {
471
+ let a = i8x4:: new ( 1 , 2 , 3 , 4 ) ;
472
+ let b = i8x4:: new ( 5 , 4 , 3 , 2 ) ;
473
+ let c = i8x4:: new ( 3 , 3 , 3 , 3 ) ;
474
+ let r: i8x4 = dsp_call ! ( dsp:: shadd8, a, b) ;
475
+ assert_eq ! ( r, c) ;
476
+ }
477
+ }
478
+
479
+ #[ test]
480
+ fn shadd16 ( ) {
481
+ unsafe {
482
+ let a = i16x2:: new ( 1 , 2 ) ;
483
+ let b = i16x2:: new ( 5 , 4 ) ;
484
+ let c = i16x2:: new ( 3 , 3 ) ;
485
+ let r: i16x2 = dsp_call ! ( dsp:: shadd16, a, b) ;
486
+ assert_eq ! ( r, c) ;
487
+ }
488
+ }
489
+
490
+ #[ test]
491
+ fn shsub8 ( ) {
492
+ unsafe {
493
+ let a = i8x4:: new ( 1 , 2 , 3 , 4 ) ;
494
+ let b = i8x4:: new ( 5 , 4 , 3 , 2 ) ;
495
+ let c = i8x4:: new ( -2 , -1 , 0 , 1 ) ;
496
+ let r: i8x4 = dsp_call ! ( dsp:: shsub8, a, b) ;
497
+ assert_eq ! ( r, c) ;
498
+ }
499
+ }
500
+
501
+ #[ test]
502
+ fn shsub16 ( ) {
503
+ unsafe {
504
+ let a = i16x2:: new ( 1 , 2 ) ;
505
+ let b = i16x2:: new ( 5 , 4 ) ;
506
+ let c = i16x2:: new ( -2 , -1 ) ;
507
+ let r: i16x2 = dsp_call ! ( dsp:: shsub16, a, b) ;
508
+ assert_eq ! ( r, c) ;
509
+ }
510
+ }
511
+
512
+ #[ test]
513
+ fn smuad ( ) {
514
+ unsafe {
515
+ let a = i16x2:: new ( 1 , 2 ) ;
516
+ let b = i16x2:: new ( 5 , 4 ) ;
517
+ let r = dsp:: smuad ( :: mem:: transmute ( a) , :: mem:: transmute ( b) ) ;
518
+ assert_eq ! ( r, 13 ) ;
519
+ }
520
+ }
521
+
522
+ #[ test]
523
+ fn smuadx ( ) {
524
+ unsafe {
525
+ let a = i16x2:: new ( 1 , 2 ) ;
526
+ let b = i16x2:: new ( 5 , 4 ) ;
527
+ let r = dsp:: smuadx ( :: mem:: transmute ( a) , :: mem:: transmute ( b) ) ;
528
+ assert_eq ! ( r, 14 ) ;
529
+ }
530
+ }
531
+
532
+ #[ test]
533
+ fn smusd ( ) {
534
+ unsafe {
535
+ let a = i16x2:: new ( 1 , 2 ) ;
536
+ let b = i16x2:: new ( 5 , 4 ) ;
537
+ let r = dsp:: smusd ( :: mem:: transmute ( a) , :: mem:: transmute ( b) ) ;
538
+ assert_eq ! ( r, -3 ) ;
539
+ }
540
+ }
541
+
542
+ #[ test]
543
+ fn smusdx ( ) {
544
+ unsafe {
545
+ let a = i16x2:: new ( 1 , 2 ) ;
546
+ let b = i16x2:: new ( 5 , 4 ) ;
547
+ let r = dsp:: smusdx ( :: mem:: transmute ( a) , :: mem:: transmute ( b) ) ;
548
+ assert_eq ! ( r, -6 ) ;
549
+ }
550
+ }
340
551
}
0 commit comments