@@ -1284,6 +1284,50 @@ pub unsafe fn _mm256_insert_epi64(a: i64x4, i: i64, index: i32) -> i64x4 {
1284
1284
c. replace ( index as u32 & 3 , i)
1285
1285
}
1286
1286
1287
+ /// Load 256-bits (composed of 4 packed double-precision (64-bit)
1288
+ /// floating-point elements) from memory into result.
1289
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1290
+ /// general-protection exception may be generated.
1291
+ #[ inline( always) ]
1292
+ #[ target_feature = "+avx" ]
1293
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ] // FIXME vmovapd expected
1294
+ pub unsafe fn _mm256_load_pd ( mem_addr : * const f64 ) -> f64x4 {
1295
+ * ( mem_addr as * const f64x4 )
1296
+ }
1297
+
1298
+ /// Store 256-bits (composed of 4 packed double-precision (64-bit)
1299
+ /// floating-point elements) from `a` into memory.
1300
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1301
+ /// general-protection exception may be generated.
1302
+ #[ inline( always) ]
1303
+ #[ target_feature = "+avx" ]
1304
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ] // FIXME vmovapd expected
1305
+ pub unsafe fn _mm256_store_pd ( mem_addr : * const f64 , a : f64x4 ) {
1306
+ * ( mem_addr as * mut f64x4 ) = a;
1307
+ }
1308
+
1309
+ /// Load 256-bits (composed of 8 packed single-precision (32-bit)
1310
+ /// floating-point elements) from memory into result.
1311
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1312
+ /// general-protection exception may be generated.
1313
+ #[ inline( always) ]
1314
+ #[ target_feature = "+avx" ]
1315
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ]
1316
+ pub unsafe fn _mm256_load_ps ( mem_addr : * const f32 ) -> f32x8 {
1317
+ * ( mem_addr as * const f32x8 )
1318
+ }
1319
+
1320
+ /// Store 256-bits (composed of 8 packed single-precision (32-bit)
1321
+ /// floating-point elements) from `a` into memory.
1322
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1323
+ /// general-protection exception may be generated.
1324
+ #[ inline( always) ]
1325
+ #[ target_feature = "+avx" ]
1326
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ]
1327
+ pub unsafe fn _mm256_store_ps ( mem_addr : * const f32 , a : f32x8 ) {
1328
+ * ( mem_addr as * mut f32x8 ) = a;
1329
+ }
1330
+
1287
1331
/// Load 256-bits (composed of 4 packed double-precision (64-bit)
1288
1332
/// floating-point elements) from memory into result.
1289
1333
/// `mem_addr` does not need to be aligned on any particular boundary.
@@ -1336,6 +1380,26 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: f32x8) {
1336
1380
storeups256 ( mem_addr, a) ;
1337
1381
}
1338
1382
1383
+ /// Load 256-bits of integer data from memory into result.
1384
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1385
+ /// general-protection exception may be generated.
1386
+ #[ inline( always) ]
1387
+ #[ target_feature = "+avx" ]
1388
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ] // FIXME vmovdqa expected
1389
+ pub unsafe fn _mm256_load_si256 ( mem_addr : * const __m256i ) -> __m256i {
1390
+ * mem_addr
1391
+ }
1392
+
1393
+ /// Store 256-bits of integer data from `a` into memory.
1394
+ /// `mem_addr` must be aligned on a 32-byte boundary or a
1395
+ /// general-protection exception may be generated.
1396
+ #[ inline( always) ]
1397
+ #[ target_feature = "+avx" ]
1398
+ #[ cfg_attr( test, assert_instr( vmovaps) ) ] // FIXME vmovdqa expected
1399
+ pub unsafe fn _mm256_store_si256 ( mem_addr : * mut __m256i , a : __m256i ) {
1400
+ * mem_addr = a;
1401
+ }
1402
+
1339
1403
/// Load 256-bits of integer data from memory into result.
1340
1404
/// `mem_addr` does not need to be aligned on any particular boundary.
1341
1405
#[ inline( always) ]
@@ -3241,6 +3305,40 @@ mod tests {
3241
3305
assert_eq ! ( r, e) ;
3242
3306
}
3243
3307
3308
+ #[ simd_test = "avx" ]
3309
+ unsafe fn _mm256_load_pd ( ) {
3310
+ let a = avx:: _mm256_setr_pd ( 1. , 2. , 3. , 4. ) ;
3311
+ let p = & a as * const _ as * const f64 ;
3312
+ let r = avx:: _mm256_load_pd ( p) ;
3313
+ let e = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3314
+ assert_eq ! ( r, e) ;
3315
+ }
3316
+
3317
+ #[ simd_test = "avx" ]
3318
+ unsafe fn _mm256_store_pd ( ) {
3319
+ let a = avx:: _mm256_setr_pd ( 1. , 2. , 3. , 4. ) ;
3320
+ let mut r = avx:: _mm256_undefined_pd ( ) ;
3321
+ avx:: _mm256_store_pd ( & mut r as * mut _ as * mut f64 , a) ;
3322
+ assert_eq ! ( r, a) ;
3323
+ }
3324
+
3325
+ #[ simd_test = "avx" ]
3326
+ unsafe fn _mm256_load_ps ( ) {
3327
+ let a = avx:: _mm256_setr_ps ( 4. , 3. , 2. , 5. , 8. , 9. , 64. , 50. ) ;
3328
+ let p = & a as * const _ as * const f32 ;
3329
+ let r = avx:: _mm256_load_ps ( p) ;
3330
+ let e = f32x8:: new ( 4. , 3. , 2. , 5. , 8. , 9. , 64. , 50. ) ;
3331
+ assert_eq ! ( r, e) ;
3332
+ }
3333
+
3334
+ #[ simd_test = "avx" ]
3335
+ unsafe fn _mm256_store_ps ( ) {
3336
+ let a = avx:: _mm256_setr_ps ( 4. , 3. , 2. , 5. , 8. , 9. , 64. , 50. ) ;
3337
+ let mut r = avx:: _mm256_undefined_ps ( ) ;
3338
+ avx:: _mm256_store_ps ( & mut r as * mut _ as * mut f32 , a) ;
3339
+ assert_eq ! ( r, a) ;
3340
+ }
3341
+
3244
3342
#[ simd_test = "avx" ]
3245
3343
unsafe fn _mm256_loadu_pd ( ) {
3246
3344
let a = & [ 1.0f64 , 2. , 3. , 4. ] ;
@@ -3275,6 +3373,23 @@ mod tests {
3275
3373
assert_eq ! ( r, a) ;
3276
3374
}
3277
3375
3376
+ #[ simd_test = "avx" ]
3377
+ unsafe fn _mm256_load_si256 ( ) {
3378
+ let a = __m256i:: from ( avx:: _mm256_setr_epi64x ( 1 , 2 , 3 , 4 ) ) ;
3379
+ let p = & a as * const _ ;
3380
+ let r = avx:: _mm256_load_si256 ( p) ;
3381
+ let e = i64x4:: new ( 1 , 2 , 3 , 4 ) ;
3382
+ assert_eq ! ( r, __m256i:: from( e) ) ;
3383
+ }
3384
+
3385
+ #[ simd_test = "avx" ]
3386
+ unsafe fn _mm256_store_si256 ( ) {
3387
+ let a = __m256i:: from ( avx:: _mm256_setr_epi64x ( 1 , 2 , 3 , 4 ) ) ;
3388
+ let mut r = avx:: _mm256_undefined_si256 ( ) ;
3389
+ avx:: _mm256_store_si256 ( & mut r as * mut _ , a) ;
3390
+ assert_eq ! ( r, a) ;
3391
+ }
3392
+
3278
3393
#[ simd_test = "avx" ]
3279
3394
unsafe fn _mm256_loadu_si256 ( ) {
3280
3395
let a = __m256i:: from ( i64x4:: new ( 1 , 2 , 3 , 4 ) ) ;
0 commit comments