Skip to content

Commit 476c0cc

Browse files
committed
[AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp conversions
When Neon is not available use SVE variants of FCVTZS, FCVTZU, UCVTF, and SCVTF for fp -> int -> fp conversions to avoid moving values to/from GPRs which may be expensive. Note: With +sme2p2 the single-element vector Neon variants of these instructions could be used instead (but that feature is not implemented yet). Follow up to llvm#112213.
1 parent df05512 commit 476c0cc

File tree

2 files changed

+107
-17
lines changed

2 files changed

+107
-17
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

+35
Original file line numberDiff line numberDiff line change
@@ -2421,6 +2421,41 @@ let Predicates = [HasSVEorSME] in {
24212421
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
24222422
} // End HasSVEorSME
24232423

2424+
// Helper for creating fp -> int -> fp conversions using SVE.
2425+
class sve_fp_int_fp_cvt<Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
2426+
: OutPatFrag<(ops node: $Rn),
2427+
(EXTRACT_SUBREG
2428+
(FROM_INT (IMPLICIT_DEF), (PTRUE 1),
2429+
(TO_INT (IMPLICIT_DEF), (PTRUE 1),
2430+
(INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;
2431+
2432+
// Some float -> int -> float conversion patterns where we want to keep the int
2433+
// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR
2434+
// register transfers. Only used when NEON is not available (e.g. in streaming
2435+
// functions).
2436+
// TODO: When +sme2p2 is available single-element vectors should be preferred.
2437+
def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
2438+
let Predicates = [HasSVEorSME, HasNoNEON] in {
2439+
def : Pat<
2440+
(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
2441+
(sve_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
2442+
def : Pat<
2443+
(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
2444+
(sve_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
2445+
def : Pat<
2446+
(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
2447+
(sve_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
2448+
def : Pat<
2449+
(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
2450+
(sve_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
2451+
def : Pat<
2452+
(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
2453+
(sve_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
2454+
def : Pat<
2455+
(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
2456+
(sve_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
2457+
} // End HasSVEorSME, HasNoNEON
2458+
24242459
let Predicates = [HasBF16, HasSVEorSME] in {
24252460
defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
24262461
defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

+72-17
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
2+
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3+
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
34
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING
45

56
target triple = "aarch64-unknown-linux-gnu"
67

78
define double @t1(double %x) {
89
; CHECK-LABEL: t1:
910
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: fcvtzs x8, d0
11-
; CHECK-NEXT: scvtf d0, x8
11+
; CHECK-NEXT: ptrue p0.d, vl1
12+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
13+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
14+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
15+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1216
; CHECK-NEXT: ret
1317
;
18+
; NONEON-NOSVE-LABEL: t1:
19+
; NONEON-NOSVE: // %bb.0: // %entry
20+
; NONEON-NOSVE-NEXT: fcvtzs x8, d0
21+
; NONEON-NOSVE-NEXT: scvtf d0, x8
22+
; NONEON-NOSVE-NEXT: ret
23+
;
1424
; NON-STREAMING-LABEL: t1:
1525
; NON-STREAMING: // %bb.0: // %entry
1626
; NON-STREAMING-NEXT: fcvtzs d0, d0
@@ -25,10 +35,19 @@ entry:
2535
define float @t2(float %x) {
2636
; CHECK-LABEL: t2:
2737
; CHECK: // %bb.0: // %entry
28-
; CHECK-NEXT: fcvtzs w8, s0
29-
; CHECK-NEXT: scvtf s0, w8
38+
; CHECK-NEXT: ptrue p0.s, vl1
39+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
40+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
41+
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
42+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
3043
; CHECK-NEXT: ret
3144
;
45+
; NONEON-NOSVE-LABEL: t2:
46+
; NONEON-NOSVE: // %bb.0: // %entry
47+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
48+
; NONEON-NOSVE-NEXT: scvtf s0, w8
49+
; NONEON-NOSVE-NEXT: ret
50+
;
3251
; NON-STREAMING-LABEL: t2:
3352
; NON-STREAMING: // %bb.0: // %entry
3453
; NON-STREAMING-NEXT: fcvtzs s0, s0
@@ -43,12 +62,21 @@ entry:
4362
define half @t3(half %x) {
4463
; CHECK-LABEL: t3:
4564
; CHECK: // %bb.0: // %entry
46-
; CHECK-NEXT: fcvt s0, h0
47-
; CHECK-NEXT: fcvtzs w8, s0
48-
; CHECK-NEXT: scvtf s0, w8
49-
; CHECK-NEXT: fcvt h0, s0
65+
; CHECK-NEXT: ptrue p0.h, vl1
66+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
67+
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
68+
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
69+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
5070
; CHECK-NEXT: ret
5171
;
72+
; NONEON-NOSVE-LABEL: t3:
73+
; NONEON-NOSVE: // %bb.0: // %entry
74+
; NONEON-NOSVE-NEXT: fcvt s0, h0
75+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
76+
; NONEON-NOSVE-NEXT: scvtf s0, w8
77+
; NONEON-NOSVE-NEXT: fcvt h0, s0
78+
; NONEON-NOSVE-NEXT: ret
79+
;
5280
; NON-STREAMING-LABEL: t3:
5381
; NON-STREAMING: // %bb.0: // %entry
5482
; NON-STREAMING-NEXT: fcvt s0, h0
@@ -65,10 +93,19 @@ entry:
6593
define double @t4(double %x) {
6694
; CHECK-LABEL: t4:
6795
; CHECK: // %bb.0: // %entry
68-
; CHECK-NEXT: fcvtzu x8, d0
69-
; CHECK-NEXT: ucvtf d0, x8
96+
; CHECK-NEXT: ptrue p0.d, vl1
97+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
98+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
99+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
100+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
70101
; CHECK-NEXT: ret
71102
;
103+
; NONEON-NOSVE-LABEL: t4:
104+
; NONEON-NOSVE: // %bb.0: // %entry
105+
; NONEON-NOSVE-NEXT: fcvtzu x8, d0
106+
; NONEON-NOSVE-NEXT: ucvtf d0, x8
107+
; NONEON-NOSVE-NEXT: ret
108+
;
72109
; NON-STREAMING-LABEL: t4:
73110
; NON-STREAMING: // %bb.0: // %entry
74111
; NON-STREAMING-NEXT: fcvtzu d0, d0
@@ -83,10 +120,19 @@ entry:
83120
define float @t5(float %x) {
84121
; CHECK-LABEL: t5:
85122
; CHECK: // %bb.0: // %entry
86-
; CHECK-NEXT: fcvtzu w8, s0
87-
; CHECK-NEXT: ucvtf s0, w8
123+
; CHECK-NEXT: ptrue p0.s, vl1
124+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
125+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
126+
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
127+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
88128
; CHECK-NEXT: ret
89129
;
130+
; NONEON-NOSVE-LABEL: t5:
131+
; NONEON-NOSVE: // %bb.0: // %entry
132+
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
133+
; NONEON-NOSVE-NEXT: ucvtf s0, w8
134+
; NONEON-NOSVE-NEXT: ret
135+
;
90136
; NON-STREAMING-LABEL: t5:
91137
; NON-STREAMING: // %bb.0: // %entry
92138
; NON-STREAMING-NEXT: fcvtzu s0, s0
@@ -101,12 +147,21 @@ entry:
101147
define half @t6(half %x) {
102148
; CHECK-LABEL: t6:
103149
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: fcvt s0, h0
105-
; CHECK-NEXT: fcvtzu w8, s0
106-
; CHECK-NEXT: ucvtf s0, w8
107-
; CHECK-NEXT: fcvt h0, s0
150+
; CHECK-NEXT: ptrue p0.h, vl1
151+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
152+
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
153+
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
154+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
108155
; CHECK-NEXT: ret
109156
;
157+
; NONEON-NOSVE-LABEL: t6:
158+
; NONEON-NOSVE: // %bb.0: // %entry
159+
; NONEON-NOSVE-NEXT: fcvt s0, h0
160+
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
161+
; NONEON-NOSVE-NEXT: ucvtf s0, w8
162+
; NONEON-NOSVE-NEXT: fcvt h0, s0
163+
; NONEON-NOSVE-NEXT: ret
164+
;
110165
; NON-STREAMING-LABEL: t6:
111166
; NON-STREAMING: // %bb.0: // %entry
112167
; NON-STREAMING-NEXT: fcvt s0, h0

0 commit comments

Comments
 (0)