Skip to content

Commit 4c28d21

Browse files
authored
1 parent 4ba1800 commit 4c28d21

File tree

3 files changed

+333
-229
lines changed

3 files changed

+333
-229
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+9-6
Original file line numberDiff line numberDiff line change
@@ -6237,7 +6237,8 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
62376237
// Some float -> int -> float conversion patterns for which we want to keep the
62386238
// int values in FP registers using the corresponding NEON instructions to
62396239
// avoid more costly int <-> fp register transfers.
6240-
let Predicates = [HasNEONandIsStreamingSafe] in {
6240+
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
6241+
let Predicates = [HasNEON] in {
62416242
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
62426243
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
62436244
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6247,7 +6248,8 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
62476248
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
62486249
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
62496250

6250-
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
6251+
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
6252+
let Predicates = [HasNEON, HasFullFP16] in {
62516253
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
62526254
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
62536255
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6270,9 +6272,10 @@ def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
62706272

62716273
// fp16: integer extraction from vector must be at least 32-bits to be legal.
62726274
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6273-
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
6274-
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
6275-
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
6275+
// TODO: Allow these in streaming[-compatible] functions with +sme2p2.
6276+
let Predicates = [HasNEON, HasFullFP16] in {
6277+
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
6278+
(v8i16 FPR128:$Rn), (i64 0))), i16)))),
62766279
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
62776280

62786281
// unsigned 32-bit extracted element is truncated to 16-bits using AND
@@ -6367,7 +6370,7 @@ def : Pat <(f64 (uint_to_fp (i32
63676370
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
63686371
// 64-bits -> double are handled in target specific dag combine:
63696372
// performIntToFpCombine.
6370-
} // let Predicates = [HasNEONandIsStreamingSafe]
6373+
} // let Predicates = [HasNEON]
63716374

63726375
//===----------------------------------------------------------------------===//
63736376
// Advanced SIMD three different-sized vector instructions.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
3+
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
define double @t1(double %x) {
8+
; CHECK-LABEL: t1:
9+
; CHECK: // %bb.0: // %entry
10+
; CHECK-NEXT: fcvtzs x8, d0
11+
; CHECK-NEXT: scvtf d0, x8
12+
; CHECK-NEXT: ret
13+
;
14+
; NON-STREAMING-LABEL: t1:
15+
; NON-STREAMING: // %bb.0: // %entry
16+
; NON-STREAMING-NEXT: fcvtzs d0, d0
17+
; NON-STREAMING-NEXT: scvtf d0, d0
18+
; NON-STREAMING-NEXT: ret
19+
entry:
20+
%conv = fptosi double %x to i64
21+
%conv1 = sitofp i64 %conv to double
22+
ret double %conv1
23+
}
24+
25+
define float @t2(float %x) {
26+
; CHECK-LABEL: t2:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: fcvtzs w8, s0
29+
; CHECK-NEXT: scvtf s0, w8
30+
; CHECK-NEXT: ret
31+
;
32+
; NON-STREAMING-LABEL: t2:
33+
; NON-STREAMING: // %bb.0: // %entry
34+
; NON-STREAMING-NEXT: fcvtzs s0, s0
35+
; NON-STREAMING-NEXT: scvtf s0, s0
36+
; NON-STREAMING-NEXT: ret
37+
entry:
38+
%conv = fptosi float %x to i32
39+
%conv1 = sitofp i32 %conv to float
40+
ret float %conv1
41+
}
42+
43+
define half @t3(half %x) {
44+
; CHECK-LABEL: t3:
45+
; CHECK: // %bb.0: // %entry
46+
; CHECK-NEXT: fcvt s0, h0
47+
; CHECK-NEXT: fcvtzs w8, s0
48+
; CHECK-NEXT: scvtf s0, w8
49+
; CHECK-NEXT: fcvt h0, s0
50+
; CHECK-NEXT: ret
51+
;
52+
; NON-STREAMING-LABEL: t3:
53+
; NON-STREAMING: // %bb.0: // %entry
54+
; NON-STREAMING-NEXT: fcvt s0, h0
55+
; NON-STREAMING-NEXT: fcvtzs s0, s0
56+
; NON-STREAMING-NEXT: scvtf s0, s0
57+
; NON-STREAMING-NEXT: fcvt h0, s0
58+
; NON-STREAMING-NEXT: ret
59+
entry:
60+
%conv = fptosi half %x to i32
61+
%conv1 = sitofp i32 %conv to half
62+
ret half %conv1
63+
}
64+
65+
define double @t4(double %x) {
66+
; CHECK-LABEL: t4:
67+
; CHECK: // %bb.0: // %entry
68+
; CHECK-NEXT: fcvtzu x8, d0
69+
; CHECK-NEXT: ucvtf d0, x8
70+
; CHECK-NEXT: ret
71+
;
72+
; NON-STREAMING-LABEL: t4:
73+
; NON-STREAMING: // %bb.0: // %entry
74+
; NON-STREAMING-NEXT: fcvtzu d0, d0
75+
; NON-STREAMING-NEXT: ucvtf d0, d0
76+
; NON-STREAMING-NEXT: ret
77+
entry:
78+
%conv = fptoui double %x to i64
79+
%conv1 = uitofp i64 %conv to double
80+
ret double %conv1
81+
}
82+
83+
define float @t5(float %x) {
84+
; CHECK-LABEL: t5:
85+
; CHECK: // %bb.0: // %entry
86+
; CHECK-NEXT: fcvtzu w8, s0
87+
; CHECK-NEXT: ucvtf s0, w8
88+
; CHECK-NEXT: ret
89+
;
90+
; NON-STREAMING-LABEL: t5:
91+
; NON-STREAMING: // %bb.0: // %entry
92+
; NON-STREAMING-NEXT: fcvtzu s0, s0
93+
; NON-STREAMING-NEXT: ucvtf s0, s0
94+
; NON-STREAMING-NEXT: ret
95+
entry:
96+
%conv = fptoui float %x to i32
97+
%conv1 = uitofp i32 %conv to float
98+
ret float %conv1
99+
}
100+
101+
define half @t6(half %x) {
102+
; CHECK-LABEL: t6:
103+
; CHECK: // %bb.0: // %entry
104+
; CHECK-NEXT: fcvt s0, h0
105+
; CHECK-NEXT: fcvtzu w8, s0
106+
; CHECK-NEXT: ucvtf s0, w8
107+
; CHECK-NEXT: fcvt h0, s0
108+
; CHECK-NEXT: ret
109+
;
110+
; NON-STREAMING-LABEL: t6:
111+
; NON-STREAMING: // %bb.0: // %entry
112+
; NON-STREAMING-NEXT: fcvt s0, h0
113+
; NON-STREAMING-NEXT: fcvtzu s0, s0
114+
; NON-STREAMING-NEXT: ucvtf s0, s0
115+
; NON-STREAMING-NEXT: fcvt h0, s0
116+
; NON-STREAMING-NEXT: ret
117+
entry:
118+
%conv = fptoui half %x to i32
119+
%conv1 = uitofp i32 %conv to half
120+
ret half %conv1
121+
}

0 commit comments

Comments
 (0)