Skip to content

Commit b70ae88

Browse files
authored
Fix avx512f build on x86-32; fix avx512gfni test fail (rust-lang#1264)
1 parent f4513d5 commit b70ae88

File tree

3 files changed

+484
-451
lines changed

3 files changed

+484
-451
lines changed

crates/core_arch/src/x86/avx512bw.rs

+92-90
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use crate::{
77
#[cfg(test)]
88
use stdarch_test::assert_instr;
99

10+
use super::avx512f::{vpl, vps};
11+
1012
/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
1113
///
1214
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
@@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
42374239
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
42384240
let mut dst: __m512i = src;
42394241
asm!(
4240-
"vmovdqu16 {2}{{{1}}}, [{0}]",
4241-
in(reg) mem_addr,
4242-
in(kreg) k,
4243-
inout(zmm_reg) dst,
4244-
options(pure, readonly, nostack)
4242+
vpl!("vmovdqu16 {dst}{{{k}}}"),
4243+
p = in(reg) mem_addr,
4244+
k = in(kreg) k,
4245+
dst = inout(zmm_reg) dst,
4246+
options(pure, readonly, nostack)
42454247
);
42464248
dst
42474249
}
@@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
42564258
pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
42574259
let mut dst: __m512i;
42584260
asm!(
4259-
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
4260-
in(reg) mem_addr,
4261-
in(kreg) k,
4262-
out(zmm_reg) dst,
4263-
options(pure, readonly, nostack)
4261+
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4262+
p = in(reg) mem_addr,
4263+
k = in(kreg) k,
4264+
dst = out(zmm_reg) dst,
4265+
options(pure, readonly, nostack)
42644266
);
42654267
dst
42664268
}
@@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
42754277
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
42764278
let mut dst: __m512i = src;
42774279
asm!(
4278-
"vmovdqu8 {2}{{{1}}}, [{0}]",
4279-
in(reg) mem_addr,
4280-
in(kreg) k,
4281-
inout(zmm_reg) dst,
4282-
options(pure, readonly, nostack)
4280+
vpl!("vmovdqu8 {dst}{{{k}}}"),
4281+
p = in(reg) mem_addr,
4282+
k = in(kreg) k,
4283+
dst = inout(zmm_reg) dst,
4284+
options(pure, readonly, nostack)
42834285
);
42844286
dst
42854287
}
@@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
42944296
pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
42954297
let mut dst: __m512i;
42964298
asm!(
4297-
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
4298-
in(reg) mem_addr,
4299-
in(kreg) k,
4300-
out(zmm_reg) dst,
4301-
options(pure, readonly, nostack)
4299+
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4300+
p = in(reg) mem_addr,
4301+
k = in(kreg) k,
4302+
dst = out(zmm_reg) dst,
4303+
options(pure, readonly, nostack)
43024304
);
43034305
dst
43044306
}
@@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
43134315
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
43144316
let mut dst: __m256i = src;
43154317
asm!(
4316-
"vmovdqu16 {2}{{{1}}}, [{0}]",
4317-
in(reg) mem_addr,
4318-
in(kreg) k,
4319-
inout(ymm_reg) dst,
4320-
options(pure, readonly, nostack)
4318+
vpl!("vmovdqu16 {dst}{{{k}}}"),
4319+
p = in(reg) mem_addr,
4320+
k = in(kreg) k,
4321+
dst = inout(ymm_reg) dst,
4322+
options(pure, readonly, nostack)
43214323
);
43224324
dst
43234325
}
@@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
43324334
pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
43334335
let mut dst: __m256i;
43344336
asm!(
4335-
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
4336-
in(reg) mem_addr,
4337-
in(kreg) k,
4338-
out(ymm_reg) dst,
4339-
options(pure, readonly, nostack)
4337+
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4338+
p = in(reg) mem_addr,
4339+
k = in(kreg) k,
4340+
dst = out(ymm_reg) dst,
4341+
options(pure, readonly, nostack)
43404342
);
43414343
dst
43424344
}
@@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
43514353
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
43524354
let mut dst: __m256i = src;
43534355
asm!(
4354-
"vmovdqu8 {2}{{{1}}}, [{0}]",
4355-
in(reg) mem_addr,
4356-
in(kreg) k,
4357-
inout(ymm_reg) dst,
4358-
options(pure, readonly, nostack)
4356+
vpl!("vmovdqu8 {dst}{{{k}}}"),
4357+
p = in(reg) mem_addr,
4358+
k = in(kreg) k,
4359+
dst = inout(ymm_reg) dst,
4360+
options(pure, readonly, nostack)
43594361
);
43604362
dst
43614363
}
@@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
43704372
pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
43714373
let mut dst: __m256i;
43724374
asm!(
4373-
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
4374-
in(reg) mem_addr,
4375-
in(kreg) k,
4376-
out(ymm_reg) dst,
4377-
options(pure, readonly, nostack)
4375+
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4376+
p = in(reg) mem_addr,
4377+
k = in(kreg) k,
4378+
dst = out(ymm_reg) dst,
4379+
options(pure, readonly, nostack)
43784380
);
43794381
dst
43804382
}
@@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
43894391
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
43904392
let mut dst: __m128i = src;
43914393
asm!(
4392-
"vmovdqu16 {2}{{{1}}}, [{0}]",
4393-
in(reg) mem_addr,
4394-
in(kreg) k,
4395-
inout(xmm_reg) dst,
4396-
options(pure, readonly, nostack)
4394+
vpl!("vmovdqu16 {dst}{{{k}}}"),
4395+
p = in(reg) mem_addr,
4396+
k = in(kreg) k,
4397+
dst = inout(xmm_reg) dst,
4398+
options(pure, readonly, nostack)
43974399
);
43984400
dst
43994401
}
@@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
44084410
pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
44094411
let mut dst: __m128i;
44104412
asm!(
4411-
"vmovdqu16 {2}{{{1}}} {{z}}, [{0}]",
4412-
in(reg) mem_addr,
4413-
in(kreg) k,
4414-
out(xmm_reg) dst,
4415-
options(pure, readonly, nostack)
4413+
vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4414+
p = in(reg) mem_addr,
4415+
k = in(kreg) k,
4416+
dst = out(xmm_reg) dst,
4417+
options(pure, readonly, nostack)
44164418
);
44174419
dst
44184420
}
@@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
44274429
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
44284430
let mut dst: __m128i = src;
44294431
asm!(
4430-
"vmovdqu8 {2}{{{1}}}, [{0}]",
4431-
in(reg) mem_addr,
4432-
in(kreg) k,
4433-
inout(xmm_reg) dst,
4434-
options(pure, readonly, nostack)
4432+
vpl!("vmovdqu8 {dst}{{{k}}}"),
4433+
p = in(reg) mem_addr,
4434+
k = in(kreg) k,
4435+
dst = inout(xmm_reg) dst,
4436+
options(pure, readonly, nostack)
44354437
);
44364438
dst
44374439
}
@@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
44464448
pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
44474449
let mut dst: __m128i;
44484450
asm!(
4449-
"vmovdqu8 {2}{{{1}}} {{z}}, [{0}]",
4450-
in(reg) mem_addr,
4451-
in(kreg) k,
4452-
out(xmm_reg) dst,
4453-
options(pure, readonly, nostack)
4451+
vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4452+
p = in(reg) mem_addr,
4453+
k = in(kreg) k,
4454+
dst = out(xmm_reg) dst,
4455+
options(pure, readonly, nostack)
44544456
);
44554457
dst
44564458
}
@@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
44634465
#[target_feature(enable = "avx512f,avx512bw")]
44644466
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
44654467
asm!(
4466-
"vmovdqu16 [{0}]{{{1}}}, {2}",
4467-
in(reg) mem_addr,
4468-
in(kreg) mask,
4469-
in(zmm_reg) a,
4470-
options(nostack)
4468+
vps!("vmovdqu16", "{{{mask}}}, {a}"),
4469+
p = in(reg) mem_addr,
4470+
mask = in(kreg) mask,
4471+
a = in(zmm_reg) a,
4472+
options(nostack)
44714473
);
44724474
}
44734475

@@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
44794481
#[target_feature(enable = "avx512f,avx512bw")]
44804482
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
44814483
asm!(
4482-
"vmovdqu8 [{0}]{{{1}}}, {2}",
4483-
in(reg) mem_addr,
4484-
in(kreg) mask,
4485-
in(zmm_reg) a,
4486-
options(nostack)
4484+
vps!("vmovdqu8", "{{{mask}}}, {a}"),
4485+
p = in(reg) mem_addr,
4486+
mask = in(kreg) mask,
4487+
a = in(zmm_reg) a,
4488+
options(nostack)
44874489
);
44884490
}
44894491

@@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
44954497
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
44964498
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
44974499
asm!(
4498-
"vmovdqu16 [{0}]{{{1}}}, {2}",
4499-
in(reg) mem_addr,
4500-
in(kreg) mask,
4501-
in(ymm_reg) a,
4502-
options(nostack)
4500+
vps!("vmovdqu16", "{{{mask}}}, {a}"),
4501+
p = in(reg) mem_addr,
4502+
mask = in(kreg) mask,
4503+
a = in(ymm_reg) a,
4504+
options(nostack)
45034505
);
45044506
}
45054507

@@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
45114513
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
45124514
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
45134515
asm!(
4514-
"vmovdqu8 [{0}]{{{1}}}, {2}",
4515-
in(reg) mem_addr,
4516-
in(kreg) mask,
4517-
in(ymm_reg) a,
4518-
options(nostack)
4516+
vps!("vmovdqu8", "{{{mask}}}, {a}"),
4517+
p = in(reg) mem_addr,
4518+
mask = in(kreg) mask,
4519+
a = in(ymm_reg) a,
4520+
options(nostack)
45194521
);
45204522
}
45214523

@@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
45274529
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
45284530
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
45294531
asm!(
4530-
"vmovdqu16 [{0}]{{{1}}}, {2}",
4531-
in(reg) mem_addr,
4532-
in(kreg) mask,
4533-
in(xmm_reg) a,
4534-
options(nostack)
4532+
vps!("vmovdqu16", "{{{mask}}}, {a}"),
4533+
p = in(reg) mem_addr,
4534+
mask = in(kreg) mask,
4535+
a = in(xmm_reg) a,
4536+
options(nostack)
45354537
);
45364538
}
45374539

@@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
45434545
#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
45444546
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
45454547
asm!(
4546-
"vmovdqu8 [{0}]{{{1}}}, {2}",
4547-
in(reg) mem_addr,
4548-
in(kreg) mask,
4549-
in(xmm_reg) a,
4550-
options(nostack)
4548+
vps!("vmovdqu8", "{{{mask}}}, {a}"),
4549+
p = in(reg) mem_addr,
4550+
mask = in(kreg) mask,
4551+
a = in(xmm_reg) a,
4552+
options(nostack)
45514553
);
45524554
}
45534555

0 commit comments

Comments
 (0)