Skip to content

Commit 071f8f9

Browse files
gwennalexcrichton
authored andcommitted
Avx (#172)
* avx: _mm256_load_pd, _mm256_store_pd, _mm256_load_ps, _mm256_store_ps * avx: _mm256_load_si256, _mm256_store_si256
1 parent 4c244fb commit 071f8f9

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

src/x86/avx.rs

+115
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,50 @@ pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 {
12841284
c.replace(index as u32 & 3, i)
12851285
}
12861286

1287+
/// Load 256-bits (composed of 4 packed double-precision (64-bit)
1288+
/// floating-point elements) from memory into result.
1289+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1290+
/// general-protection exception may be generated.
1291+
#[inline(always)]
1292+
#[target_feature = "+avx"]
1293+
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1294+
pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> f64x4 {
1295+
*(mem_addr as *const f64x4)
1296+
}
1297+
1298+
/// Store 256-bits (composed of 4 packed double-precision (64-bit)
1299+
/// floating-point elements) from `a` into memory.
1300+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1301+
/// general-protection exception may be generated.
1302+
#[inline(always)]
1303+
#[target_feature = "+avx"]
1304+
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1305+
pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: f64x4) {
1306+
*(mem_addr as *mut f64x4) = a;
1307+
}
1308+
1309+
/// Load 256-bits (composed of 8 packed single-precision (32-bit)
1310+
/// floating-point elements) from memory into result.
1311+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1312+
/// general-protection exception may be generated.
1313+
#[inline(always)]
1314+
#[target_feature = "+avx"]
1315+
#[cfg_attr(test, assert_instr(vmovaps))]
1316+
pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> f32x8 {
1317+
*(mem_addr as *const f32x8)
1318+
}
1319+
1320+
/// Store 256-bits (composed of 8 packed single-precision (32-bit)
1321+
/// floating-point elements) from `a` into memory.
1322+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1323+
/// general-protection exception may be generated.
1324+
#[inline(always)]
1325+
#[target_feature = "+avx"]
1326+
#[cfg_attr(test, assert_instr(vmovaps))]
1327+
pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: f32x8) {
1328+
*(mem_addr as *mut f32x8) = a;
1329+
}
1330+
12871331
/// Load 256-bits (composed of 4 packed double-precision (64-bit)
12881332
/// floating-point elements) from memory into result.
12891333
/// `mem_addr` does not need to be aligned on any particular boundary.
@@ -1336,6 +1380,26 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: f32x8) {
13361380
storeups256(mem_addr, a);
13371381
}
13381382

1383+
/// Load 256-bits of integer data from memory into result.
1384+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1385+
/// general-protection exception may be generated.
1386+
#[inline(always)]
1387+
#[target_feature = "+avx"]
1388+
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1389+
pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1390+
*mem_addr
1391+
}
1392+
1393+
/// Store 256-bits of integer data from `a` into memory.
1394+
/// `mem_addr` must be aligned on a 32-byte boundary or a
1395+
/// general-protection exception may be generated.
1396+
#[inline(always)]
1397+
#[target_feature = "+avx"]
1398+
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1399+
pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1400+
*mem_addr = a;
1401+
}
1402+
13391403
/// Load 256-bits of integer data from memory into result.
13401404
/// `mem_addr` does not need to be aligned on any particular boundary.
13411405
#[inline(always)]
@@ -3241,6 +3305,40 @@ mod tests {
32413305
assert_eq!(r, e);
32423306
}
32433307

3308+
#[simd_test = "avx"]
3309+
unsafe fn _mm256_load_pd() {
3310+
let a = avx::_mm256_setr_pd(1., 2., 3., 4.);
3311+
let p = &a as *const _ as *const f64;
3312+
let r = avx::_mm256_load_pd(p);
3313+
let e = f64x4::new(1., 2., 3., 4.);
3314+
assert_eq!(r, e);
3315+
}
3316+
3317+
#[simd_test = "avx"]
3318+
unsafe fn _mm256_store_pd() {
3319+
let a = avx::_mm256_setr_pd(1., 2., 3., 4.);
3320+
let mut r = avx::_mm256_undefined_pd();
3321+
avx::_mm256_store_pd(&mut r as *mut _ as *mut f64, a);
3322+
assert_eq!(r, a);
3323+
}
3324+
3325+
#[simd_test = "avx"]
3326+
unsafe fn _mm256_load_ps() {
3327+
let a = avx::_mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3328+
let p = &a as *const _ as *const f32;
3329+
let r = avx::_mm256_load_ps(p);
3330+
let e = f32x8::new(4., 3., 2., 5., 8., 9., 64., 50.);
3331+
assert_eq!(r, e);
3332+
}
3333+
3334+
#[simd_test = "avx"]
3335+
unsafe fn _mm256_store_ps() {
3336+
let a = avx::_mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3337+
let mut r = avx::_mm256_undefined_ps();
3338+
avx::_mm256_store_ps(&mut r as *mut _ as *mut f32, a);
3339+
assert_eq!(r, a);
3340+
}
3341+
32443342
#[simd_test = "avx"]
32453343
unsafe fn _mm256_loadu_pd() {
32463344
let a = &[1.0f64, 2., 3., 4.];
@@ -3275,6 +3373,23 @@ mod tests {
32753373
assert_eq!(r, a);
32763374
}
32773375

3376+
#[simd_test = "avx"]
3377+
unsafe fn _mm256_load_si256() {
3378+
let a = __m256i::from(avx::_mm256_setr_epi64x(1, 2, 3, 4));
3379+
let p = &a as *const _;
3380+
let r = avx::_mm256_load_si256(p);
3381+
let e = i64x4::new(1, 2, 3, 4);
3382+
assert_eq!(r, __m256i::from(e));
3383+
}
3384+
3385+
#[simd_test = "avx"]
3386+
unsafe fn _mm256_store_si256() {
3387+
let a = __m256i::from(avx::_mm256_setr_epi64x(1, 2, 3, 4));
3388+
let mut r = avx::_mm256_undefined_si256();
3389+
avx::_mm256_store_si256(&mut r as *mut _, a);
3390+
assert_eq!(r, a);
3391+
}
3392+
32783393
#[simd_test = "avx"]
32793394
unsafe fn _mm256_loadu_si256() {
32803395
let a = __m256i::from(i64x4::new(1, 2, 3, 4));

0 commit comments

Comments
 (0)