Skip to content

Commit 4c244fb

Browse files
MaloJaffrealexcrichton
authored andcommitted
Add SSE2 trivial aliases and conversions. (#165)
`_mm_cvtsd_f64`, `_mm_cvtsd_si64x` and `_mm_cvttsd_si64x`. See #40.
1 parent 1cbd309 commit 4c244fb

File tree

1 file changed

+42
-4
lines changed

1 file changed

+42
-4
lines changed

src/x86/sse2.rs

+42-4
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,13 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
17871787
cvtsd2si64(a)
17881788
}
17891789

1790+
/// Alias for [`_mm_cvtsd_si64`](fn._mm_cvtsd_si64_ss.html).
1791+
#[cfg(target_arch = "x86_64")]
1792+
#[inline(always)]
1793+
#[target_feature = "+sse2"]
1794+
#[cfg_attr(test, assert_instr(cvtsd2si))]
1795+
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
1796+
17901797
/// Convert the lower double-precision (64-bit) floating-point element in `b`
17911798
/// to a single-precision (32-bit) floating-point element, store the result in
17921799
/// the lower element of the return value, and copy the upper element from `a`
@@ -1798,6 +1805,14 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
17981805
cvtsd2ss(a, b)
17991806
}
18001807

1808+
/// Return the lower double-precision (64-bit) floating-point element of "a".
1809+
#[inline(always)]
1810+
#[target_feature = "+sse2"]
1811+
// no particular instruction to test
1812+
pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 {
1813+
a.extract(0)
1814+
}
1815+
18011816
/// Convert the lower single-precision (32-bit) floating-point element in `b`
18021817
/// to a double-precision (64-bit) floating-point element, store the result in
18031818
/// the lower element of the return value, and copy the upper element from `a`
@@ -1837,6 +1852,13 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
18371852
cvttsd2si64(a)
18381853
}
18391854

1855+
/// Alias for [`_mm_cvttsd_si64`](fn._mm_cvttsd_si64_ss.html).
1856+
#[cfg(target_arch = "x86_64")]
1857+
#[inline(always)]
1858+
#[target_feature = "+sse2"]
1859+
#[cfg_attr(test, assert_instr(cvttsd2si))]
1860+
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
1861+
18401862
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
18411863
/// packed 32-bit integers with truncation.
18421864
#[inline(always)]
@@ -3979,8 +4001,14 @@ mod tests {
39794001

39804002
let r = sse2::_mm_cvtsd_si64(f64x2::new(f64::MAX, f64::MIN));
39814003
assert_eq!(r, i64::MIN);
4004+
}
4005+
4006+
#[cfg(target_arch = "x86_64")]
4007+
#[simd_test = "sse2"]
4008+
unsafe fn _mm_cvtsd_si64x() {
4009+
use std::{f64, i64};
39824010

3983-
let r = sse2::_mm_cvtsd_si64(f64x2::new(f64::NAN, f64::NAN));
4011+
let r = sse2::_mm_cvtsd_si64x(f64x2::new(f64::NAN, f64::NAN));
39844012
assert_eq!(r, i64::MIN);
39854013
}
39864014

@@ -4012,6 +4040,12 @@ mod tests {
40124040
);
40134041
}
40144042

4043+
#[simd_test = "sse2"]
4044+
unsafe fn _mm_cvtsd_f64() {
4045+
let r = sse2::_mm_cvtsd_f64(f64x2::new(-1.1, 2.2));
4046+
assert_eq!(r, -1.1);
4047+
}
4048+
40154049
#[simd_test = "sse2"]
40164050
unsafe fn _mm_cvtss_sd() {
40174051
use std::{f32, f64};
@@ -4058,14 +4092,18 @@ mod tests {
40584092
#[cfg(target_arch = "x86_64")]
40594093
#[simd_test = "sse2"]
40604094
unsafe fn _mm_cvttsd_si64() {
4061-
use std::{f64, i64};
4062-
40634095
let a = f64x2::new(-1.1, 2.2);
40644096
let r = sse2::_mm_cvttsd_si64(a);
40654097
assert_eq!(r, -1_i64);
4098+
}
4099+
4100+
#[cfg(target_arch = "x86_64")]
4101+
#[simd_test = "sse2"]
4102+
unsafe fn _mm_cvttsd_si64x() {
4103+
use std::{f64, i64};
40664104

40674105
let a = f64x2::new(f64::NEG_INFINITY, f64::NAN);
4068-
let r = sse2::_mm_cvttsd_si64(a);
4106+
let r = sse2::_mm_cvttsd_si64x(a);
40694107
assert_eq!(r, i64::MIN);
40704108
}
40714109

0 commit comments

Comments
 (0)