Skip to content

Commit 2a6847a

Browse files
authored
support for neon instructions vabal_* and vabal_high_* (rust-lang#1097)
1 parent 655bfc8 commit 2a6847a

File tree

3 files changed

+414
-0
lines changed

3 files changed

+414
-0
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

+129
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,75 @@ pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
27272727
simd_shuffle2(a, b, [1, 3])
27282728
}
27292729

2730+
/// Unsigned Absolute difference and Accumulate Long
2731+
#[inline]
2732+
#[target_feature(enable = "neon")]
2733+
#[cfg_attr(test, assert_instr(uabal))]
2734+
pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
2735+
let d: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2736+
let e: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
2737+
let f: uint8x8_t = vabd_u8(d, e);
2738+
simd_add(a, simd_cast(f))
2739+
}
2740+
2741+
/// Unsigned Absolute difference and Accumulate Long
2742+
#[inline]
2743+
#[target_feature(enable = "neon")]
2744+
#[cfg_attr(test, assert_instr(uabal))]
2745+
pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
2746+
let d: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2747+
let e: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
2748+
let f: uint16x4_t = vabd_u16(d, e);
2749+
simd_add(a, simd_cast(f))
2750+
}
2751+
2752+
/// Unsigned Absolute difference and Accumulate Long
2753+
#[inline]
2754+
#[target_feature(enable = "neon")]
2755+
#[cfg_attr(test, assert_instr(uabal))]
2756+
pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
2757+
let d: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
2758+
let e: uint32x2_t = simd_shuffle2(c, c, [2, 3]);
2759+
let f: uint32x2_t = vabd_u32(d, e);
2760+
simd_add(a, simd_cast(f))
2761+
}
2762+
2763+
/// Signed Absolute difference and Accumulate Long
2764+
#[inline]
2765+
#[target_feature(enable = "neon")]
2766+
#[cfg_attr(test, assert_instr(sabal))]
2767+
pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
2768+
let d: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2769+
let e: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
2770+
let f: int8x8_t = vabd_s8(d, e);
2771+
let f: uint8x8_t = simd_cast(f);
2772+
simd_add(a, simd_cast(f))
2773+
}
2774+
2775+
/// Signed Absolute difference and Accumulate Long
2776+
#[inline]
2777+
#[target_feature(enable = "neon")]
2778+
#[cfg_attr(test, assert_instr(sabal))]
2779+
pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
2780+
let d: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2781+
let e: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
2782+
let f: int16x4_t = vabd_s16(d, e);
2783+
let f: uint16x4_t = simd_cast(f);
2784+
simd_add(a, simd_cast(f))
2785+
}
2786+
2787+
/// Signed Absolute difference and Accumulate Long
2788+
#[inline]
2789+
#[target_feature(enable = "neon")]
2790+
#[cfg_attr(test, assert_instr(sabal))]
2791+
pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
2792+
let d: int32x2_t = simd_shuffle2(b, b, [2, 3]);
2793+
let e: int32x2_t = simd_shuffle2(c, c, [2, 3]);
2794+
let f: int32x2_t = vabd_s32(d, e);
2795+
let f: uint32x2_t = simd_cast(f);
2796+
simd_add(a, simd_cast(f))
2797+
}
2798+
27302799
#[cfg(test)]
27312800
mod test {
27322801
use super::*;
@@ -5318,4 +5387,64 @@ mod test {
53185387
let r: f64x2 = transmute(vzip2q_f64(transmute(a), transmute(b)));
53195388
assert_eq!(r, e);
53205389
}
5390+
5391+
#[simd_test(enable = "neon")]
5392+
unsafe fn test_vabal_high_u8() {
5393+
let a: u16x8 = u16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5394+
let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5395+
let c: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
5396+
let e: u16x8 = u16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
5397+
let r: u16x8 = transmute(vabal_high_u8(transmute(a), transmute(b), transmute(c)));
5398+
assert_eq!(r, e);
5399+
}
5400+
5401+
#[simd_test(enable = "neon")]
5402+
unsafe fn test_vabal_high_u16() {
5403+
let a: u32x4 = u32x4::new(9, 10, 11, 12);
5404+
let b: u16x8 = u16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
5405+
let c: u16x8 = u16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
5406+
let e: u32x4 = u32x4::new(20, 20, 20, 20);
5407+
let r: u32x4 = transmute(vabal_high_u16(transmute(a), transmute(b), transmute(c)));
5408+
assert_eq!(r, e);
5409+
}
5410+
5411+
#[simd_test(enable = "neon")]
5412+
unsafe fn test_vabal_high_u32() {
5413+
let a: u64x2 = u64x2::new(15, 16);
5414+
let b: u32x4 = u32x4::new(1, 2, 15, 16);
5415+
let c: u32x4 = u32x4::new(10, 10, 10, 12);
5416+
let e: u64x2 = u64x2::new(20, 20);
5417+
let r: u64x2 = transmute(vabal_high_u32(transmute(a), transmute(b), transmute(c)));
5418+
assert_eq!(r, e);
5419+
}
5420+
5421+
#[simd_test(enable = "neon")]
5422+
unsafe fn test_vabal_high_s8() {
5423+
let a: i16x8 = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5424+
let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5425+
let c: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
5426+
let e: i16x8 = i16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
5427+
let r: i16x8 = transmute(vabal_high_s8(transmute(a), transmute(b), transmute(c)));
5428+
assert_eq!(r, e);
5429+
}
5430+
5431+
#[simd_test(enable = "neon")]
5432+
unsafe fn test_vabal_high_s16() {
5433+
let a: i32x4 = i32x4::new(9, 10, 11, 12);
5434+
let b: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
5435+
let c: i16x8 = i16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
5436+
let e: i32x4 = i32x4::new(20, 20, 20, 20);
5437+
let r: i32x4 = transmute(vabal_high_s16(transmute(a), transmute(b), transmute(c)));
5438+
assert_eq!(r, e);
5439+
}
5440+
5441+
#[simd_test(enable = "neon")]
5442+
unsafe fn test_vabal_high_s32() {
5443+
let a: i64x2 = i64x2::new(15, 16);
5444+
let b: i32x4 = i32x4::new(1, 2, 15, 16);
5445+
let c: i32x4 = i32x4::new(10, 10, 10, 12);
5446+
let e: i64x2 = i64x2::new(20, 20);
5447+
let r: i64x2 = transmute(vabal_high_s32(transmute(a), transmute(b), transmute(c)));
5448+
assert_eq!(r, e);
5449+
}
53215450
}

crates/core_arch/src/arm/neon/generated.rs

+129
Original file line numberDiff line numberDiff line change
@@ -4413,6 +4413,75 @@ pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
44134413
vrecpeq_f32_(a)
44144414
}
44154415

4416+
/// Unsigned Absolute difference and Accumulate Long
4417+
#[inline]
4418+
#[target_feature(enable = "neon")]
4419+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4420+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
4421+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
4422+
pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
4423+
let d: uint8x8_t = vabd_u8(b, c);
4424+
simd_add(a, simd_cast(d))
4425+
}
4426+
4427+
/// Unsigned Absolute difference and Accumulate Long
4428+
#[inline]
4429+
#[target_feature(enable = "neon")]
4430+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4431+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
4432+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
4433+
pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
4434+
let d: uint16x4_t = vabd_u16(b, c);
4435+
simd_add(a, simd_cast(d))
4436+
}
4437+
4438+
/// Unsigned Absolute difference and Accumulate Long
4439+
#[inline]
4440+
#[target_feature(enable = "neon")]
4441+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4442+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
4443+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))]
4444+
pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
4445+
let d: uint32x2_t = vabd_u32(b, c);
4446+
simd_add(a, simd_cast(d))
4447+
}
4448+
4449+
/// Signed Absolute difference and Accumulate Long
4450+
#[inline]
4451+
#[target_feature(enable = "neon")]
4452+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4453+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
4454+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
4455+
pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
4456+
let d: int8x8_t = vabd_s8(b, c);
4457+
let e: uint8x8_t = simd_cast(d);
4458+
simd_add(a, simd_cast(e))
4459+
}
4460+
4461+
/// Signed Absolute difference and Accumulate Long
4462+
#[inline]
4463+
#[target_feature(enable = "neon")]
4464+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4465+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
4466+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
4467+
pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
4468+
let d: int16x4_t = vabd_s16(b, c);
4469+
let e: uint16x4_t = simd_cast(d);
4470+
simd_add(a, simd_cast(e))
4471+
}
4472+
4473+
/// Signed Absolute difference and Accumulate Long
4474+
#[inline]
4475+
#[target_feature(enable = "neon")]
4476+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4477+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
4478+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))]
4479+
pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
4480+
let d: int32x2_t = vabd_s32(b, c);
4481+
let e: uint32x2_t = simd_cast(d);
4482+
simd_add(a, simd_cast(e))
4483+
}
4484+
44164485
#[cfg(test)]
44174486
#[allow(overflowing_literals)]
44184487
mod test {
@@ -7843,4 +7912,64 @@ mod test {
78437912
let r: f32x4 = transmute(vrecpeq_f32(transmute(a)));
78447913
assert_eq!(r, e);
78457914
}
7915+
7916+
#[simd_test(enable = "neon")]
7917+
unsafe fn test_vabal_u8() {
7918+
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
7919+
let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
7920+
let c: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
7921+
let e: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
7922+
let r: u16x8 = transmute(vabal_u8(transmute(a), transmute(b), transmute(c)));
7923+
assert_eq!(r, e);
7924+
}
7925+
7926+
#[simd_test(enable = "neon")]
7927+
unsafe fn test_vabal_u16() {
7928+
let a: u32x4 = u32x4::new(1, 2, 3, 4);
7929+
let b: u16x4 = u16x4::new(1, 2, 3, 4);
7930+
let c: u16x4 = u16x4::new(10, 10, 10, 10);
7931+
let e: u32x4 = u32x4::new(10, 10, 10, 10);
7932+
let r: u32x4 = transmute(vabal_u16(transmute(a), transmute(b), transmute(c)));
7933+
assert_eq!(r, e);
7934+
}
7935+
7936+
#[simd_test(enable = "neon")]
7937+
unsafe fn test_vabal_u32() {
7938+
let a: u64x2 = u64x2::new(1, 2);
7939+
let b: u32x2 = u32x2::new(1, 2);
7940+
let c: u32x2 = u32x2::new(10, 10);
7941+
let e: u64x2 = u64x2::new(10, 10);
7942+
let r: u64x2 = transmute(vabal_u32(transmute(a), transmute(b), transmute(c)));
7943+
assert_eq!(r, e);
7944+
}
7945+
7946+
#[simd_test(enable = "neon")]
7947+
unsafe fn test_vabal_s8() {
7948+
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
7949+
let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
7950+
let c: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
7951+
let e: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
7952+
let r: i16x8 = transmute(vabal_s8(transmute(a), transmute(b), transmute(c)));
7953+
assert_eq!(r, e);
7954+
}
7955+
7956+
#[simd_test(enable = "neon")]
7957+
unsafe fn test_vabal_s16() {
7958+
let a: i32x4 = i32x4::new(1, 2, 3, 4);
7959+
let b: i16x4 = i16x4::new(1, 2, 3, 4);
7960+
let c: i16x4 = i16x4::new(10, 10, 10, 10);
7961+
let e: i32x4 = i32x4::new(10, 10, 10, 10);
7962+
let r: i32x4 = transmute(vabal_s16(transmute(a), transmute(b), transmute(c)));
7963+
assert_eq!(r, e);
7964+
}
7965+
7966+
#[simd_test(enable = "neon")]
7967+
unsafe fn test_vabal_s32() {
7968+
let a: i64x2 = i64x2::new(1, 2);
7969+
let b: i32x2 = i32x2::new(1, 2);
7970+
let c: i32x2 = i32x2::new(10, 10);
7971+
let e: i64x2 = i64x2::new(10, 10);
7972+
let r: i64x2 = transmute(vabal_s32(transmute(a), transmute(b), transmute(c)));
7973+
assert_eq!(r, e);
7974+
}
78467975
}

0 commit comments

Comments
 (0)