Skip to content

Commit 3d3ef9d

Browse files
phoebewangtru
authored andcommitted
[X86][FP16] Add the missing legal action for EXTRACT_SUBVECTOR
Fixes #57340 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D132563 (cherry picked from commit 12b203e)
1 parent 9c29291 commit 3d3ef9d

File tree

4 files changed

+337
-34
lines changed

4 files changed

+337
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1521,7 +1521,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
15211521
// Extract subvector is special because the value type
15221522
// (result) is 128-bit but the source is 256-bit wide.
15231523
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1524-
MVT::v4f32, MVT::v2f64 }) {
1524+
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
15251525
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
15261526
}
15271527

@@ -1861,7 +1861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18611861
// (result) is 256-bit but the source is 512-bit wide.
18621862
// 128-bit was made Legal under AVX1.
18631863
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1864-
MVT::v8f32, MVT::v4f64 })
1864+
MVT::v16f16, MVT::v8f32, MVT::v4f64 })
18651865
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
18661866

18671867
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,

llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll

+10-16
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,17 @@
44
define <16 x half> @foo(<16 x half> %a, <16 x half> %b) nounwind {
55
; CHECK-LABEL: foo:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: pushq %rbp
8-
; CHECK-NEXT: movq %rsp, %rbp
9-
; CHECK-NEXT: andq $-32, %rsp
10-
; CHECK-NEXT: subq $96, %rsp
11-
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
12-
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
13-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm0
14-
; CHECK-NEXT: vcvtph2ps (%rsp), %ymm1
15-
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
7+
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm2
8+
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm3
9+
; CHECK-NEXT: vaddps %ymm2, %ymm3, %ymm2
10+
; CHECK-NEXT: vcvtps2ph $4, %ymm2, %xmm2
11+
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
12+
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm1
13+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
14+
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0
15+
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
1616
; CHECK-NEXT: vcvtps2ph $4, %ymm0, %xmm0
17-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm1
18-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm2
19-
; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1
20-
; CHECK-NEXT: vcvtps2ph $4, %ymm1, %xmm1
21-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
22-
; CHECK-NEXT: movq %rbp, %rsp
23-
; CHECK-NEXT: popq %rbp
17+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2418
; CHECK-NEXT: retq
2519
%1 = fadd <16 x half> %a, %b
2620
ret <16 x half> %1

llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll

+10-16
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,17 @@
44
define <32 x half> @foo(<32 x half> %a, <32 x half> %b) nounwind {
55
; CHECK-LABEL: foo:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: pushq %rbp
8-
; CHECK-NEXT: movq %rsp, %rbp
9-
; CHECK-NEXT: andq $-64, %rsp
10-
; CHECK-NEXT: subq $192, %rsp
11-
; CHECK-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
12-
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
13-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm0
14-
; CHECK-NEXT: vcvtph2ps (%rsp), %zmm1
15-
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
7+
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm2
8+
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm3
9+
; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm2
10+
; CHECK-NEXT: vcvtps2ph $4, %zmm2, %ymm2
11+
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
12+
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm1
13+
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
14+
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
15+
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
1616
; CHECK-NEXT: vcvtps2ph $4, %zmm0, %ymm0
17-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm1
18-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm2
19-
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
20-
; CHECK-NEXT: vcvtps2ph $4, %zmm1, %ymm1
21-
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
22-
; CHECK-NEXT: movq %rbp, %rsp
23-
; CHECK-NEXT: popq %rbp
17+
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
2418
; CHECK-NEXT: retq
2519
%1 = fadd <32 x half> %a, %b
2620
ret <32 x half> %1

0 commit comments

Comments
 (0)