Skip to content

Commit dd981c0

Browse files
authored
Add popcount Arm intrinsics. (rust-lang#935)
1 parent ece43e9 commit dd981c0

File tree

1 file changed

+125
-0
lines changed
  • crates/core_arch/src/arm/neon

1 file changed

+125
-0
lines changed

crates/core_arch/src/arm/neon/mod.rs

+125
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ extern "C" {
195195
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
196196
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
197197
fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
198+
199+
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")]
200+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v8i8")]
201+
fn vcnt_s8_(a: int8x8_t) -> int8x8_t;
202+
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")]
203+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v16i8")]
204+
fn vcntq_s8_(a: int8x16_t) -> int8x16_t;
198205
}
199206

200207
#[cfg(target_arch = "arm")]
@@ -1863,6 +1870,61 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
18631870
vmaxq_f32_(a, b)
18641871
}
18651872

1873+
/// Population count per byte.
1874+
#[inline]
1875+
#[target_feature(enable = "neon")]
1876+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1877+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1878+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1879+
pub unsafe fn vcnt_s8(a: int8x8_t) -> int8x8_t {
1880+
vcnt_s8_(a)
1881+
}
1882+
/// Population count per byte.
1883+
#[inline]
1884+
#[target_feature(enable = "neon")]
1885+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1886+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1887+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1888+
pub unsafe fn vcntq_s8(a: int8x16_t) -> int8x16_t {
1889+
vcntq_s8_(a)
1890+
}
1891+
/// Population count per byte.
1892+
#[inline]
1893+
#[target_feature(enable = "neon")]
1894+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1895+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1896+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1897+
pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
1898+
transmute(vcnt_s8_(transmute(a)))
1899+
}
1900+
/// Population count per byte.
1901+
#[inline]
1902+
#[target_feature(enable = "neon")]
1903+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1904+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1905+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1906+
pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
1907+
transmute(vcntq_s8_(transmute(a)))
1908+
}
1909+
/// Population count per byte.
1910+
#[inline]
1911+
#[target_feature(enable = "neon")]
1912+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1913+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1914+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1915+
pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
1916+
transmute(vcnt_s8_(transmute(a)))
1917+
}
1918+
/// Population count per byte.
1919+
#[inline]
1920+
#[target_feature(enable = "neon")]
1921+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1922+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
1923+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))]
1924+
pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
1925+
transmute(vcntq_s8_(transmute(a)))
1926+
}
1927+
18661928
#[cfg(test)]
18671929
mod tests {
18681930
use super::*;
@@ -4357,6 +4419,69 @@ mod tests {
43574419
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
43584420
assert_eq!(r, e);
43594421
}
4422+
#[simd_test(enable = "neon")]
4423+
unsafe fn test_vcnt_s8() {
4424+
let a: i8x8 = transmute(u8x8::new(
4425+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4426+
0b00111111,
4427+
));
4428+
let e = i8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
4429+
let r: i8x8 = transmute(vcnt_s8(transmute(a)));
4430+
assert_eq!(r, e);
4431+
}
4432+
#[simd_test(enable = "neon")]
4433+
unsafe fn test_vcntq_s8() {
4434+
let a: i8x16 = transmute(u8x16::new(
4435+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4436+
0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
4437+
0b11100000, 0b00010000,
4438+
));
4439+
let e = i8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
4440+
let r: i8x16 = transmute(vcntq_s8(transmute(a)));
4441+
assert_eq!(r, e);
4442+
}
4443+
#[simd_test(enable = "neon")]
4444+
unsafe fn test_vcnt_u8() {
4445+
let a = u8x8::new(
4446+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4447+
0b00111111,
4448+
);
4449+
let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
4450+
let r: u8x8 = transmute(vcnt_u8(transmute(a)));
4451+
assert_eq!(r, e);
4452+
}
4453+
#[simd_test(enable = "neon")]
4454+
unsafe fn test_vcntq_u8() {
4455+
let a = u8x16::new(
4456+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4457+
0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
4458+
0b11100000, 0b00010000,
4459+
);
4460+
let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
4461+
let r: u8x16 = transmute(vcntq_u8(transmute(a)));
4462+
assert_eq!(r, e);
4463+
}
4464+
#[simd_test(enable = "neon")]
4465+
unsafe fn test_vcnt_p8() {
4466+
let a = u8x8::new(
4467+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4468+
0b00111111,
4469+
);
4470+
let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
4471+
let r: u8x8 = transmute(vcnt_p8(transmute(a)));
4472+
assert_eq!(r, e);
4473+
}
4474+
#[simd_test(enable = "neon")]
4475+
unsafe fn test_vcntq_p8() {
4476+
let a = u8x16::new(
4477+
0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
4478+
0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
4479+
0b11100000, 0b00010000,
4480+
);
4481+
let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
4482+
let r: u8x16 = transmute(vcntq_p8(transmute(a)));
4483+
assert_eq!(r, e);
4484+
}
43604485
}
43614486

43624487
#[cfg(test)]

0 commit comments

Comments
 (0)