Skip to content

Commit 12b203e

Browse files
committed
[X86][FP16] Add the missing legal action for EXTRACT_SUBVECTOR
Fixes #57340 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D132563
1 parent a8cd939 commit 12b203e

File tree

4 files changed

+337
-34
lines changed

4 files changed

+337
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1520,7 +1520,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
15201520
// Extract subvector is special because the value type
15211521
// (result) is 128-bit but the source is 256-bit wide.
15221522
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1523-
MVT::v4f32, MVT::v2f64 }) {
1523+
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
15241524
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
15251525
}
15261526

@@ -1860,7 +1860,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18601860
// (result) is 256-bit but the source is 512-bit wide.
18611861
// 128-bit was made Legal under AVX1.
18621862
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1863-
MVT::v8f32, MVT::v4f64 })
1863+
MVT::v16f16, MVT::v8f32, MVT::v4f64 })
18641864
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
18651865

18661866
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,

llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll

+10-16
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,17 @@
44
define <16 x half> @foo(<16 x half> %a, <16 x half> %b) nounwind {
55
; CHECK-LABEL: foo:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: pushq %rbp
8-
; CHECK-NEXT: movq %rsp, %rbp
9-
; CHECK-NEXT: andq $-32, %rsp
10-
; CHECK-NEXT: subq $96, %rsp
11-
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
12-
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
13-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm0
14-
; CHECK-NEXT: vcvtph2ps (%rsp), %ymm1
15-
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
7+
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm2
8+
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm3
9+
; CHECK-NEXT: vaddps %ymm2, %ymm3, %ymm2
10+
; CHECK-NEXT: vcvtps2ph $4, %ymm2, %xmm2
11+
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
12+
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm1
13+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
14+
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0
15+
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
1616
; CHECK-NEXT: vcvtps2ph $4, %ymm0, %xmm0
17-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm1
18-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm2
19-
; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1
20-
; CHECK-NEXT: vcvtps2ph $4, %ymm1, %xmm1
21-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
22-
; CHECK-NEXT: movq %rbp, %rsp
23-
; CHECK-NEXT: popq %rbp
17+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2418
; CHECK-NEXT: retq
2519
%1 = fadd <16 x half> %a, %b
2620
ret <16 x half> %1

llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll

+10-16
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,17 @@
44
define <32 x half> @foo(<32 x half> %a, <32 x half> %b) nounwind {
55
; CHECK-LABEL: foo:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: pushq %rbp
8-
; CHECK-NEXT: movq %rsp, %rbp
9-
; CHECK-NEXT: andq $-64, %rsp
10-
; CHECK-NEXT: subq $192, %rsp
11-
; CHECK-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
12-
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
13-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm0
14-
; CHECK-NEXT: vcvtph2ps (%rsp), %zmm1
15-
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
7+
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm2
8+
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm3
9+
; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm2
10+
; CHECK-NEXT: vcvtps2ph $4, %zmm2, %ymm2
11+
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
12+
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm1
13+
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
14+
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
15+
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
1616
; CHECK-NEXT: vcvtps2ph $4, %zmm0, %ymm0
17-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm1
18-
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm2
19-
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
20-
; CHECK-NEXT: vcvtps2ph $4, %zmm1, %ymm1
21-
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
22-
; CHECK-NEXT: movq %rbp, %rsp
23-
; CHECK-NEXT: popq %rbp
17+
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
2418
; CHECK-NEXT: retq
2519
%1 = fadd <32 x half> %a, %b
2620
ret <32 x half> %1

0 commit comments

Comments
 (0)