Skip to content

Commit 22c3ba4

Browse files
lizhijin1024vfdff
authored andcommitted
[SVE] Add patterns for shift intrinsics with FalseLanesZero mode
This patch adds patterns to reduce redundant mov and sel instructions for shift intrinsics with FalseLanesZero mode, when FeatureExperimentalZeroingPseudosis supported. For example, before: mov z1.b, #0 sel z0.b, p0, z0.b, z1.b asr z0.b, p0/m, z0.b, #7 After: movprfx z0.b, p0/z, z0.b asr z0.b, p0/m, z0.b, #7 Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D145551
1 parent 57aeb30 commit 22c3ba4

File tree

3 files changed

+200
-0
lines changed

3 files changed

+200
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,10 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
20602060
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
20612061
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
20622062
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
2063+
2064+
defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_asr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
2065+
defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
2066+
defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsl, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
20632067
} // End HasSVEorSME, UseExperimentalZeroingPseudos
20642068

20652069
let Predicates = [HasSVEorSME] in {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,12 @@ class SVE_Shift_DupImm_Any_Predicate_Pat<ValueType vt, SDPatternOperator op,
571571
: Pat<(vt (op (pt (SVEAnyPredicate)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
572572
(inst $Rn, i32:$imm)>;
573573

574+
class SVE_2_Op_Imm_Pat_Zero<ValueType vt, SDPatternOperator op, ValueType pt,
575+
ValueType it, ComplexPattern cpx, Instruction inst>
576+
: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)),
577+
(vt (splat_vector (it (cpx i32:$imm)))))),
578+
(inst $Pg, $Op1, i32:$imm)>;
579+
574580
class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
575581
ValueType pt, ValueType it,
576582
FPImmLeaf immL, int imm,
@@ -5894,6 +5900,20 @@ multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
58945900
def : SVE_3_Op_Pat_SelZero<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _ZERO_D)>;
58955901
}
58965902

5903+
multiclass sve_int_bin_pred_imm_zeroing_bhsd<SDPatternOperator op,
5904+
ComplexPattern imm_b, ComplexPattern imm_h,
5905+
ComplexPattern imm_s, ComplexPattern imm_d> {
5906+
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesZero>;
5907+
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesZero>;
5908+
def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesZero>;
5909+
def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesZero>;
5910+
5911+
def : SVE_2_Op_Imm_Pat_Zero<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Pseudo>(NAME # _ZERO_B)>;
5912+
def : SVE_2_Op_Imm_Pat_Zero<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Pseudo>(NAME # _ZERO_H)>;
5913+
def : SVE_2_Op_Imm_Pat_Zero<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Pseudo>(NAME # _ZERO_S)>;
5914+
def : SVE_2_Op_Imm_Pat_Zero<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Pseudo>(NAME # _ZERO_D)>;
5915+
}
5916+
58975917
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
58985918
SDPatternOperator op> {
58995919
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s
3+
4+
;; ASR
5+
define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
6+
; CHECK-LABEL: asr_i8:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
9+
; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
10+
; CHECK-NEXT: ret
11+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
12+
%ele = insertelement <vscale x 16 x i8> poison, i8 8, i32 0
13+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
14+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
15+
ret <vscale x 16 x i8> %res
16+
}
17+
18+
define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
19+
; CHECK-LABEL: asr_i16:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
22+
; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
23+
; CHECK-NEXT: ret
24+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
25+
%ele = insertelement <vscale x 8 x i16> poison, i16 16, i32 0
26+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
27+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
28+
ret <vscale x 8 x i16> %res
29+
}
30+
31+
define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
32+
; CHECK-LABEL: asr_i32:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
35+
; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
36+
; CHECK-NEXT: ret
37+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
38+
%ele = insertelement <vscale x 4 x i32> poison, i32 32, i32 0
39+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
40+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
41+
ret <vscale x 4 x i32> %res
42+
}
43+
44+
define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
45+
; CHECK-LABEL: asr_i64:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
48+
; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
49+
; CHECK-NEXT: ret
50+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
51+
%ele = insertelement <vscale x 2 x i64> poison, i64 64, i32 0
52+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
53+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
54+
ret <vscale x 2 x i64> %res
55+
}
56+
57+
;; LSL
58+
define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
59+
; CHECK-LABEL: lsl_i8:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
62+
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
63+
; CHECK-NEXT: ret
64+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
65+
%ele = insertelement <vscale x 16 x i8> poison, i8 7, i32 0
66+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
67+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
68+
ret <vscale x 16 x i8> %res
69+
}
70+
71+
define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
72+
; CHECK-LABEL: lsl_i16:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
75+
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
76+
; CHECK-NEXT: ret
77+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
78+
%ele = insertelement <vscale x 8 x i16> poison, i16 15, i32 0
79+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
80+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
81+
ret <vscale x 8 x i16> %res
82+
}
83+
84+
define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
85+
; CHECK-LABEL: lsl_i32:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
88+
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
89+
; CHECK-NEXT: ret
90+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
91+
%ele = insertelement <vscale x 4 x i32> poison, i32 31, i32 0
92+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
93+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
94+
ret <vscale x 4 x i32> %res
95+
}
96+
97+
define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
98+
; CHECK-LABEL: lsl_i64:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
101+
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
102+
; CHECK-NEXT: ret
103+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
104+
%ele = insertelement <vscale x 2 x i64> poison, i64 63, i32 0
105+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
106+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
107+
ret <vscale x 2 x i64> %res
108+
}
109+
110+
;; LSR
111+
define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg) {
112+
; CHECK-LABEL: lsr_i8:
113+
; CHECK: // %bb.0:
114+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
115+
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
116+
; CHECK-NEXT: ret
117+
%vsel = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
118+
%ele = insertelement <vscale x 16 x i8> poison, i8 8, i32 0
119+
%shuffle = shufflevector <vscale x 16 x i8> %ele, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
120+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %vsel, <vscale x 16 x i8> %shuffle)
121+
ret <vscale x 16 x i8> %res
122+
}
123+
124+
define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg) {
125+
; CHECK-LABEL: lsr_i16:
126+
; CHECK: // %bb.0:
127+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
128+
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
129+
; CHECK-NEXT: ret
130+
%vsel = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
131+
%ele = insertelement <vscale x 8 x i16> poison, i16 16, i32 0
132+
%shuffle = shufflevector <vscale x 8 x i16> %ele, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
133+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %vsel, <vscale x 8 x i16> %shuffle)
134+
ret <vscale x 8 x i16> %res
135+
}
136+
137+
define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg) local_unnamed_addr #0 {
138+
; CHECK-LABEL: lsr_i32:
139+
; CHECK: // %bb.0:
140+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
141+
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
142+
; CHECK-NEXT: ret
143+
%vsel = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
144+
%ele = insertelement <vscale x 4 x i32> poison, i32 32, i32 0
145+
%shuffle = shufflevector <vscale x 4 x i32> %ele, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
146+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %vsel, <vscale x 4 x i32> %shuffle)
147+
ret <vscale x 4 x i32> %res
148+
}
149+
150+
define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg) {
151+
; CHECK-LABEL: lsr_i64:
152+
; CHECK: // %bb.0:
153+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
154+
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
155+
; CHECK-NEXT: ret
156+
%vsel = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
157+
%ele = insertelement <vscale x 2 x i64> poison, i64 64, i32 0
158+
%shuffle = shufflevector <vscale x 2 x i64> %ele, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
159+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %vsel, <vscale x 2 x i64> %shuffle)
160+
ret <vscale x 2 x i64> %res
161+
}
162+
163+
declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
164+
declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
165+
declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
166+
declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
167+
168+
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
169+
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
170+
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
171+
declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
172+
173+
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
174+
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
175+
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
176+
declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

0 commit comments

Comments
 (0)