@@ -7,6 +7,8 @@ use crate::{
7
7
#[cfg(test)]
8
8
use stdarch_test::assert_instr;
9
9
10
+ use super::avx512f::{vpl, vps};
11
+
10
12
/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11
13
///
12
14
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
@@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
4237
4239
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
4238
4240
let mut dst: __m512i = src;
4239
4241
asm!(
4240
- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4241
- in(reg) mem_addr,
4242
- in(kreg) k,
4243
- inout(zmm_reg) dst,
4244
- options(pure, readonly, nostack)
4242
+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4243
+ p = in(reg) mem_addr,
4244
+ k = in(kreg) k,
4245
+ dst = inout(zmm_reg) dst,
4246
+ options(pure, readonly, nostack)
4245
4247
);
4246
4248
dst
4247
4249
}
@@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
4256
4258
pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
4257
4259
let mut dst: __m512i;
4258
4260
asm!(
4259
- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4260
- in(reg) mem_addr,
4261
- in(kreg) k,
4262
- out(zmm_reg) dst,
4263
- options(pure, readonly, nostack)
4261
+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4262
+ p = in(reg) mem_addr,
4263
+ k = in(kreg) k,
4264
+ dst = out(zmm_reg) dst,
4265
+ options(pure, readonly, nostack)
4264
4266
);
4265
4267
dst
4266
4268
}
@@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
4275
4277
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
4276
4278
let mut dst: __m512i = src;
4277
4279
asm!(
4278
- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4279
- in(reg) mem_addr,
4280
- in(kreg) k,
4281
- inout(zmm_reg) dst,
4282
- options(pure, readonly, nostack)
4280
+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4281
+ p = in(reg) mem_addr,
4282
+ k = in(kreg) k,
4283
+ dst = inout(zmm_reg) dst,
4284
+ options(pure, readonly, nostack)
4283
4285
);
4284
4286
dst
4285
4287
}
@@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
4294
4296
pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
4295
4297
let mut dst: __m512i;
4296
4298
asm!(
4297
- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4298
- in(reg) mem_addr,
4299
- in(kreg) k,
4300
- out(zmm_reg) dst,
4301
- options(pure, readonly, nostack)
4299
+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4300
+ p = in(reg) mem_addr,
4301
+ k = in(kreg) k,
4302
+ dst = out(zmm_reg) dst,
4303
+ options(pure, readonly, nostack)
4302
4304
);
4303
4305
dst
4304
4306
}
@@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
4313
4315
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
4314
4316
let mut dst: __m256i = src;
4315
4317
asm!(
4316
- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4317
- in(reg) mem_addr,
4318
- in(kreg) k,
4319
- inout(ymm_reg) dst,
4320
- options(pure, readonly, nostack)
4318
+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4319
+ p = in(reg) mem_addr,
4320
+ k = in(kreg) k,
4321
+ dst = inout(ymm_reg) dst,
4322
+ options(pure, readonly, nostack)
4321
4323
);
4322
4324
dst
4323
4325
}
@@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
4332
4334
pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
4333
4335
let mut dst: __m256i;
4334
4336
asm!(
4335
- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4336
- in(reg) mem_addr,
4337
- in(kreg) k,
4338
- out(ymm_reg) dst,
4339
- options(pure, readonly, nostack)
4337
+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4338
+ p = in(reg) mem_addr,
4339
+ k = in(kreg) k,
4340
+ dst = out(ymm_reg) dst,
4341
+ options(pure, readonly, nostack)
4340
4342
);
4341
4343
dst
4342
4344
}
@@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
4351
4353
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
4352
4354
let mut dst: __m256i = src;
4353
4355
asm!(
4354
- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4355
- in(reg) mem_addr,
4356
- in(kreg) k,
4357
- inout(ymm_reg) dst,
4358
- options(pure, readonly, nostack)
4356
+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4357
+ p = in(reg) mem_addr,
4358
+ k = in(kreg) k,
4359
+ dst = inout(ymm_reg) dst,
4360
+ options(pure, readonly, nostack)
4359
4361
);
4360
4362
dst
4361
4363
}
@@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
4370
4372
pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
4371
4373
let mut dst: __m256i;
4372
4374
asm!(
4373
- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4374
- in(reg) mem_addr,
4375
- in(kreg) k,
4376
- out(ymm_reg) dst,
4377
- options(pure, readonly, nostack)
4375
+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4376
+ p = in(reg) mem_addr,
4377
+ k = in(kreg) k,
4378
+ dst = out(ymm_reg) dst,
4379
+ options(pure, readonly, nostack)
4378
4380
);
4379
4381
dst
4380
4382
}
@@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
4389
4391
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
4390
4392
let mut dst: __m128i = src;
4391
4393
asm!(
4392
- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4393
- in(reg) mem_addr,
4394
- in(kreg) k,
4395
- inout(xmm_reg) dst,
4396
- options(pure, readonly, nostack)
4394
+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4395
+ p = in(reg) mem_addr,
4396
+ k = in(kreg) k,
4397
+ dst = inout(xmm_reg) dst,
4398
+ options(pure, readonly, nostack)
4397
4399
);
4398
4400
dst
4399
4401
}
@@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
4408
4410
pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
4409
4411
let mut dst: __m128i;
4410
4412
asm!(
4411
- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4412
- in(reg) mem_addr,
4413
- in(kreg) k,
4414
- out(xmm_reg) dst,
4415
- options(pure, readonly, nostack)
4413
+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4414
+ p = in(reg) mem_addr,
4415
+ k = in(kreg) k,
4416
+ dst = out(xmm_reg) dst,
4417
+ options(pure, readonly, nostack)
4416
4418
);
4417
4419
dst
4418
4420
}
@@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
4427
4429
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
4428
4430
let mut dst: __m128i = src;
4429
4431
asm!(
4430
- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4431
- in(reg) mem_addr,
4432
- in(kreg) k,
4433
- inout(xmm_reg) dst,
4434
- options(pure, readonly, nostack)
4432
+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4433
+ p = in(reg) mem_addr,
4434
+ k = in(kreg) k,
4435
+ dst = inout(xmm_reg) dst,
4436
+ options(pure, readonly, nostack)
4435
4437
);
4436
4438
dst
4437
4439
}
@@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
4446
4448
pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
4447
4449
let mut dst: __m128i;
4448
4450
asm!(
4449
- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4450
- in(reg) mem_addr,
4451
- in(kreg) k,
4452
- out(xmm_reg) dst,
4453
- options(pure, readonly, nostack)
4451
+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4452
+ p = in(reg) mem_addr,
4453
+ k = in(kreg) k,
4454
+ dst = out(xmm_reg) dst,
4455
+ options(pure, readonly, nostack)
4454
4456
);
4455
4457
dst
4456
4458
}
@@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
4463
4465
#[target_feature(enable = "avx512f,avx512bw")]
4464
4466
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
4465
4467
asm!(
4466
- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4467
- in(reg) mem_addr,
4468
- in(kreg) mask,
4469
- in(zmm_reg) a,
4470
- options(nostack)
4468
+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4469
+ p = in(reg) mem_addr,
4470
+ mask = in(kreg) mask,
4471
+ a = in(zmm_reg) a,
4472
+ options(nostack)
4471
4473
);
4472
4474
}
4473
4475
@@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
4479
4481
#[target_feature(enable = "avx512f,avx512bw")]
4480
4482
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
4481
4483
asm!(
4482
- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4483
- in(reg) mem_addr,
4484
- in(kreg) mask,
4485
- in(zmm_reg) a,
4486
- options(nostack)
4484
+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4485
+ p = in(reg) mem_addr,
4486
+ mask = in(kreg) mask,
4487
+ a = in(zmm_reg) a,
4488
+ options(nostack)
4487
4489
);
4488
4490
}
4489
4491
@@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
4495
4497
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4496
4498
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
4497
4499
asm!(
4498
- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4499
- in(reg) mem_addr,
4500
- in(kreg) mask,
4501
- in(ymm_reg) a,
4502
- options(nostack)
4500
+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4501
+ p = in(reg) mem_addr,
4502
+ mask = in(kreg) mask,
4503
+ a = in(ymm_reg) a,
4504
+ options(nostack)
4503
4505
);
4504
4506
}
4505
4507
@@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
4511
4513
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4512
4514
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
4513
4515
asm!(
4514
- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4515
- in(reg) mem_addr,
4516
- in(kreg) mask,
4517
- in(ymm_reg) a,
4518
- options(nostack)
4516
+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4517
+ p = in(reg) mem_addr,
4518
+ mask = in(kreg) mask,
4519
+ a = in(ymm_reg) a,
4520
+ options(nostack)
4519
4521
);
4520
4522
}
4521
4523
@@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
4527
4529
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4528
4530
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
4529
4531
asm!(
4530
- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4531
- in(reg) mem_addr,
4532
- in(kreg) mask,
4533
- in(xmm_reg) a,
4534
- options(nostack)
4532
+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4533
+ p = in(reg) mem_addr,
4534
+ mask = in(kreg) mask,
4535
+ a = in(xmm_reg) a,
4536
+ options(nostack)
4535
4537
);
4536
4538
}
4537
4539
@@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
4543
4545
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4544
4546
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
4545
4547
asm!(
4546
- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4547
- in(reg) mem_addr,
4548
- in(kreg) mask,
4549
- in(xmm_reg) a,
4550
- options(nostack)
4548
+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4549
+ p = in(reg) mem_addr,
4550
+ mask = in(kreg) mask,
4551
+ a = in(xmm_reg) a,
4552
+ options(nostack)
4551
4553
);
4552
4554
}
4553
4555
0 commit comments