Skip to content

Commit 0117f10

Browse files
authored
support s64|u64 for neon instruction vqadd and vqsub (rust-lang#1090)
1 parent 6e90aaf commit 0117f10

File tree

2 files changed

+204
-4
lines changed

2 files changed

+204
-4
lines changed

crates/core_arch/src/arm/neon/generated.rs

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,38 @@ pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
23572357
vqsubq_u32_(a, b)
23582358
}
23592359

2360+
/// Saturating subtract
2361+
#[inline]
2362+
#[target_feature(enable = "neon")]
2363+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2364+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
2365+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
2366+
pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
2367+
#[allow(improper_ctypes)]
2368+
extern "C" {
2369+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v1i64")]
2370+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")]
2371+
fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
2372+
}
2373+
vqsub_u64_(a, b)
2374+
}
2375+
2376+
/// Saturating subtract
2377+
#[inline]
2378+
#[target_feature(enable = "neon")]
2379+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2380+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
2381+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
2382+
pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
2383+
#[allow(improper_ctypes)]
2384+
extern "C" {
2385+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v2i64")]
2386+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")]
2387+
fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
2388+
}
2389+
vqsubq_u64_(a, b)
2390+
}
2391+
23602392
/// Saturating subtract
23612393
#[inline]
23622394
#[target_feature(enable = "neon")]
@@ -2453,6 +2485,38 @@ pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
24532485
vqsubq_s32_(a, b)
24542486
}
24552487

2488+
/// Saturating subtract
2489+
#[inline]
2490+
#[target_feature(enable = "neon")]
2491+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2492+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
2493+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
2494+
pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
2495+
#[allow(improper_ctypes)]
2496+
extern "C" {
2497+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v1i64")]
2498+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")]
2499+
fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
2500+
}
2501+
vqsub_s64_(a, b)
2502+
}
2503+
2504+
/// Saturating subtract
2505+
#[inline]
2506+
#[target_feature(enable = "neon")]
2507+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
2508+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
2509+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
2510+
pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
2511+
#[allow(improper_ctypes)]
2512+
extern "C" {
2513+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v2i64")]
2514+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")]
2515+
fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
2516+
}
2517+
vqsubq_s64_(a, b)
2518+
}
2519+
24562520
/// Halving add
24572521
#[inline]
24582522
#[target_feature(enable = "neon")]
@@ -2933,6 +2997,38 @@ pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
29332997
vqaddq_u32_(a, b)
29342998
}
29352999

3000+
/// Saturating add
3001+
#[inline]
3002+
#[target_feature(enable = "neon")]
3003+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3004+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
3005+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
3006+
pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
3007+
#[allow(improper_ctypes)]
3008+
extern "C" {
3009+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v1i64")]
3010+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")]
3011+
fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
3012+
}
3013+
vqadd_u64_(a, b)
3014+
}
3015+
3016+
/// Saturating add
3017+
#[inline]
3018+
#[target_feature(enable = "neon")]
3019+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3020+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
3021+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
3022+
pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
3023+
#[allow(improper_ctypes)]
3024+
extern "C" {
3025+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v2i64")]
3026+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")]
3027+
fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
3028+
}
3029+
vqaddq_u64_(a, b)
3030+
}
3031+
29363032
/// Saturating add
29373033
#[inline]
29383034
#[target_feature(enable = "neon")]
@@ -3029,6 +3125,38 @@ pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
30293125
vqaddq_s32_(a, b)
30303126
}
30313127

3128+
/// Saturating add
3129+
#[inline]
3130+
#[target_feature(enable = "neon")]
3131+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3132+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
3133+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
3134+
pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
3135+
#[allow(improper_ctypes)]
3136+
extern "C" {
3137+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v1i64")]
3138+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")]
3139+
fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
3140+
}
3141+
vqadd_s64_(a, b)
3142+
}
3143+
3144+
/// Saturating add
3145+
#[inline]
3146+
#[target_feature(enable = "neon")]
3147+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3148+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
3149+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
3150+
pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
3151+
#[allow(improper_ctypes)]
3152+
extern "C" {
3153+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v2i64")]
3154+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")]
3155+
fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
3156+
}
3157+
vqaddq_s64_(a, b)
3158+
}
3159+
30323160
/// Multiply
30333161
#[inline]
30343162
#[target_feature(enable = "neon")]
@@ -6171,6 +6299,24 @@ mod test {
61716299
assert_eq!(r, e);
61726300
}
61736301

6302+
#[simd_test(enable = "neon")]
6303+
unsafe fn test_vqsub_u64() {
6304+
let a: u64x1 = u64x1::new(42);
6305+
let b: u64x1 = u64x1::new(1);
6306+
let e: u64x1 = u64x1::new(41);
6307+
let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b)));
6308+
assert_eq!(r, e);
6309+
}
6310+
6311+
#[simd_test(enable = "neon")]
6312+
unsafe fn test_vqsubq_u64() {
6313+
let a: u64x2 = u64x2::new(42, 42);
6314+
let b: u64x2 = u64x2::new(1, 2);
6315+
let e: u64x2 = u64x2::new(41, 40);
6316+
let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b)));
6317+
assert_eq!(r, e);
6318+
}
6319+
61746320
#[simd_test(enable = "neon")]
61756321
unsafe fn test_vqsub_s8() {
61766322
let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@@ -6225,6 +6371,24 @@ mod test {
62256371
assert_eq!(r, e);
62266372
}
62276373

6374+
#[simd_test(enable = "neon")]
6375+
unsafe fn test_vqsub_s64() {
6376+
let a: i64x1 = i64x1::new(42);
6377+
let b: i64x1 = i64x1::new(1);
6378+
let e: i64x1 = i64x1::new(41);
6379+
let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b)));
6380+
assert_eq!(r, e);
6381+
}
6382+
6383+
#[simd_test(enable = "neon")]
6384+
unsafe fn test_vqsubq_s64() {
6385+
let a: i64x2 = i64x2::new(42, 42);
6386+
let b: i64x2 = i64x2::new(1, 2);
6387+
let e: i64x2 = i64x2::new(41, 40);
6388+
let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b)));
6389+
assert_eq!(r, e);
6390+
}
6391+
62286392
#[simd_test(enable = "neon")]
62296393
unsafe fn test_vhadd_u8() {
62306394
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@@ -6495,6 +6659,24 @@ mod test {
64956659
assert_eq!(r, e);
64966660
}
64976661

6662+
#[simd_test(enable = "neon")]
6663+
unsafe fn test_vqadd_u64() {
6664+
let a: u64x1 = u64x1::new(42);
6665+
let b: u64x1 = u64x1::new(1);
6666+
let e: u64x1 = u64x1::new(43);
6667+
let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b)));
6668+
assert_eq!(r, e);
6669+
}
6670+
6671+
#[simd_test(enable = "neon")]
6672+
unsafe fn test_vqaddq_u64() {
6673+
let a: u64x2 = u64x2::new(42, 42);
6674+
let b: u64x2 = u64x2::new(1, 2);
6675+
let e: u64x2 = u64x2::new(43, 44);
6676+
let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b)));
6677+
assert_eq!(r, e);
6678+
}
6679+
64986680
#[simd_test(enable = "neon")]
64996681
unsafe fn test_vqadd_s8() {
65006682
let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@@ -6549,6 +6731,24 @@ mod test {
65496731
assert_eq!(r, e);
65506732
}
65516733

6734+
#[simd_test(enable = "neon")]
6735+
unsafe fn test_vqadd_s64() {
6736+
let a: i64x1 = i64x1::new(42);
6737+
let b: i64x1 = i64x1::new(1);
6738+
let e: i64x1 = i64x1::new(43);
6739+
let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b)));
6740+
assert_eq!(r, e);
6741+
}
6742+
6743+
#[simd_test(enable = "neon")]
6744+
unsafe fn test_vqaddq_s64() {
6745+
let a: i64x2 = i64x2::new(42, 42);
6746+
let b: i64x2 = i64x2::new(1, 2);
6747+
let e: i64x2 = i64x2::new(43, 44);
6748+
let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b)));
6749+
assert_eq!(r, e);
6750+
}
6751+
65526752
#[simd_test(enable = "neon")]
65536753
unsafe fn test_vmul_s8() {
65546754
let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);

crates/stdarch-gen/neon.spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -820,13 +820,13 @@ arm = vqsub.s
820820
aarch64 = uqsub
821821
link-arm = vqsubu._EXT_
822822
link-aarch64 = uqsub._EXT_
823-
generate uint*_t
823+
generate uint*_t, uint64x*_t
824824

825825
arm = vqsub.s
826826
aarch64 = sqsub
827827
link-arm = vqsubs._EXT_
828828
link-aarch64 = sqsub._EXT_
829-
generate int*_t
829+
generate int*_t, int64x*_t
830830

831831
/// Halving add
832832
name = vhadd
@@ -876,13 +876,13 @@ arm = vqadd.s
876876
aarch64 = uqadd
877877
link-arm = vqaddu._EXT_
878878
link-aarch64 = uqadd._EXT_
879-
generate uint*_t
879+
generate uint*_t, uint64x*_t
880880

881881
arm = vqadd.s
882882
aarch64 = sqadd
883883
link-arm = vqadds._EXT_
884884
link-aarch64 = sqadd._EXT_
885-
generate int*_t
885+
generate int*_t, int64x*_t
886886

887887
/// Multiply
888888
name = vmul

0 commit comments

Comments
 (0)