Skip to content

Commit 1f00630

Browse files
dlrobertsonBurntSushi
authored andcommitted
[x86] Add _mm_cvtps_epi32 (cvtps2dq) function
_mm_cvtepi32_ps has been implemented, but _mm_cvtps_epi32 is missing. Use the implementation of _mm_cvtepi32_ps as a guide for implementing _mm_cvtps_epi32.
1 parent 077a736 commit 1f00630

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

src/x86/sse2.rs

+18
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,15 @@ pub unsafe fn _mm_cvtepi32_ps(a: i32x4) -> f32x4 {
691691
cvtdq2ps(a)
692692
}
693693

694+
/// Convert packed single-precision (32-bit) floating-point elements in `a`
695+
/// to packed 32-bit integers.
696+
#[inline(always)]
697+
#[target_feature = "+sse2"]
698+
#[cfg_attr(test, assert_instr(cvtps2dq))]
699+
pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 {
700+
cvtps2dq(a)
701+
}
702+
694703
/// Return a vector whose lowest element is `a` and all higher elements are
695704
/// `0`.
696705
#[inline(always)]
@@ -1819,6 +1828,8 @@ extern {
18191828
fn psrlq(a: i64x2, count: i64x2) -> i64x2;
18201829
#[link_name = "llvm.x86.sse2.cvtdq2ps"]
18211830
fn cvtdq2ps(a: i32x4) -> f32x4;
1831+
#[link_name = "llvm.x86.sse2.cvtps2dq"]
1832+
fn cvtps2dq(a: f32x4) -> i32x4;
18221833
#[link_name = "llvm.x86.sse2.maskmov.dqu"]
18231834
fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
18241835
#[link_name = "llvm.x86.sse2.packsswb.128"]
@@ -2565,6 +2576,13 @@ mod tests {
25652576
assert_eq!(r, f32x4::new(1.0, 2.0, 3.0, 4.0));
25662577
}
25672578

2579+
#[simd_test = "sse2"]
2580+
unsafe fn _mm_cvtps_epi32() {
2581+
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
2582+
let r = sse2::_mm_cvtps_epi32(a);
2583+
assert_eq!(r, i32x4::new(1, 2, 3, 4));
2584+
}
2585+
25682586
#[simd_test = "sse2"]
25692587
unsafe fn _mm_cvtsi32_si128() {
25702588
let r = sse2::_mm_cvtsi32_si128(5);

0 commit comments

Comments
 (0)