Skip to content

Commit ff313ee

Browse files
authored
[RISCV] Remove hasSideEffects=1 for vsetvli pseudos (#91319)
In a similar vein to #90049, we currently model all of the effects of a vsetvli pseudo: * VL and VTYPE are marked as defs * VL preserving x0,x0 vsetvlis doesn't get emitted until RISCVInsertVSETVLI, and when they are they have implicit uses on VL * Regular vector pseudos are fully modelled too: Before RISCVInsertVSETVLI they can be moved between vsetvli pseudos because we will eventually insert vsetvlis to correct VL and VTYPE. Afterwards, they will have implicit uses on VL and VTYPE. Since we model everything we can remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll. We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects.
1 parent 8e00703 commit ff313ee

36 files changed

+1107
-1166
lines changed

llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
7272
// are reserved for HINT instructions.
7373
const MCInstrDesc &Desc = MI.getDesc();
7474
if (!Desc.mayLoad() && !Desc.mayStore() &&
75-
!Desc.hasUnmodeledSideEffects())
75+
!Desc.hasUnmodeledSideEffects() &&
76+
MI.getOpcode() != RISCV::PseudoVSETVLI &&
77+
MI.getOpcode() != RISCV::PseudoVSETIVLI)
7678
continue;
7779
// For PseudoVSETVLIX0, Rd = X0 has special meaning.
7880
if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6181,7 +6181,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0,
61816181
//===----------------------------------------------------------------------===//
61826182

61836183
// Pseudos.
6184-
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
6184+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
61856185
// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
61866186
// the when we aren't using one of the special X0 encodings. Otherwise it could
61876187
// be accidentally be made X0 by MachineIR optimizations. To satisfy the

llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,41 +236,53 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
236236
; CHECK-NEXT: addi sp, sp, -16
237237
; CHECK-NEXT: .cfi_def_cfa_offset 16
238238
; CHECK-NEXT: csrr a1, vlenb
239-
; CHECK-NEXT: slli a1, a1, 4
239+
; CHECK-NEXT: li a3, 24
240+
; CHECK-NEXT: mul a1, a1, a3
240241
; CHECK-NEXT: sub sp, sp, a1
241-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
242+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
242243
; CHECK-NEXT: csrr a1, vlenb
243-
; CHECK-NEXT: slli a1, a1, 3
244+
; CHECK-NEXT: slli a1, a1, 4
244245
; CHECK-NEXT: add a1, sp, a1
245246
; CHECK-NEXT: addi a1, a1, 16
246247
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
247248
; CHECK-NEXT: vmv8r.v v24, v8
248249
; CHECK-NEXT: csrr a1, vlenb
249250
; CHECK-NEXT: slli a1, a1, 3
250251
; CHECK-NEXT: add a3, a2, a1
251-
; CHECK-NEXT: vl8re32.v v8, (a3)
252-
; CHECK-NEXT: addi a3, sp, 16
253-
; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
254252
; CHECK-NEXT: add a1, a0, a1
255-
; CHECK-NEXT: vl8re32.v v0, (a0)
256253
; CHECK-NEXT: vl8re32.v v8, (a1)
257-
; CHECK-NEXT: vl8re32.v v16, (a2)
254+
; CHECK-NEXT: csrr a1, vlenb
255+
; CHECK-NEXT: slli a1, a1, 3
256+
; CHECK-NEXT: add a1, sp, a1
257+
; CHECK-NEXT: addi a1, a1, 16
258+
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
259+
; CHECK-NEXT: vl8re32.v v0, (a0)
258260
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
261+
; CHECK-NEXT: vl8re32.v v8, (a3)
262+
; CHECK-NEXT: addi a0, sp, 16
263+
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
264+
; CHECK-NEXT: vl8re32.v v16, (a2)
259265
; CHECK-NEXT: vadd.vv v0, v24, v0
260266
; CHECK-NEXT: csrr a0, vlenb
261-
; CHECK-NEXT: slli a0, a0, 3
267+
; CHECK-NEXT: slli a0, a0, 4
262268
; CHECK-NEXT: add a0, sp, a0
263269
; CHECK-NEXT: addi a0, a0, 16
264270
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
265-
; CHECK-NEXT: vadd.vv v8, v24, v8
271+
; CHECK-NEXT: csrr a0, vlenb
272+
; CHECK-NEXT: slli a0, a0, 3
273+
; CHECK-NEXT: add a0, sp, a0
274+
; CHECK-NEXT: addi a0, a0, 16
275+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
276+
; CHECK-NEXT: vadd.vv v24, v24, v8
266277
; CHECK-NEXT: addi a0, sp, 16
267-
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
268-
; CHECK-NEXT: vadd.vv v8, v8, v24
278+
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
279+
; CHECK-NEXT: vadd.vv v8, v24, v8
269280
; CHECK-NEXT: vadd.vv v24, v0, v16
270281
; CHECK-NEXT: vadd.vx v16, v8, a4
271282
; CHECK-NEXT: vadd.vx v8, v24, a4
272283
; CHECK-NEXT: csrr a0, vlenb
273-
; CHECK-NEXT: slli a0, a0, 4
284+
; CHECK-NEXT: li a1, 24
285+
; CHECK-NEXT: mul a0, a0, a1
274286
; CHECK-NEXT: add sp, sp, a0
275287
; CHECK-NEXT: addi sp, sp, 16
276288
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
3939
; RV32-NEXT: vs8r.v v8, (a0)
4040
; RV32-NEXT: csrr a1, vlenb
4141
; RV32-NEXT: slli a1, a1, 3
42-
; RV32-NEXT: add a0, a0, a1
43-
; RV32-NEXT: vs8r.v v16, (a0)
42+
; RV32-NEXT: add a1, a0, a1
4443
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
4544
; RV32-NEXT: vmv.v.i v8, 0
4645
; RV32-NEXT: addi a0, sp, 128
46+
; RV32-NEXT: vs8r.v v16, (a1)
4747
; RV32-NEXT: vmv.v.i v16, 0
4848
; RV32-NEXT: call callee_scalable_vector_split_indirect
4949
; RV32-NEXT: addi sp, s0, -144
@@ -70,11 +70,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
7070
; RV64-NEXT: vs8r.v v8, (a0)
7171
; RV64-NEXT: csrr a1, vlenb
7272
; RV64-NEXT: slli a1, a1, 3
73-
; RV64-NEXT: add a0, a0, a1
74-
; RV64-NEXT: vs8r.v v16, (a0)
73+
; RV64-NEXT: add a1, a0, a1
7574
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
7675
; RV64-NEXT: vmv.v.i v8, 0
7776
; RV64-NEXT: addi a0, sp, 128
77+
; RV64-NEXT: vs8r.v v16, (a1)
7878
; RV64-NEXT: vmv.v.i v16, 0
7979
; RV64-NEXT: call callee_scalable_vector_split_indirect
8080
; RV64-NEXT: addi sp, s0, -144

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
1919
; CHECK-NEXT: frflags a0
2020
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2121
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
22-
; CHECK-NEXT: fsflags a0
2322
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
2423
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
24+
; CHECK-NEXT: fsflags a0
2525
; CHECK-NEXT: ret
2626
%r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
2727
ret <2 x half> %r
@@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
4242
; CHECK-NEXT: frflags a0
4343
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4444
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
45-
; CHECK-NEXT: fsflags a0
4645
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
4746
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
47+
; CHECK-NEXT: fsflags a0
4848
; CHECK-NEXT: ret
4949
%r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
5050
ret <4 x half> %r
@@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
6565
; CHECK-NEXT: frflags a0
6666
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
6767
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
68-
; CHECK-NEXT: fsflags a0
6968
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7069
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
70+
; CHECK-NEXT: fsflags a0
7171
; CHECK-NEXT: ret
7272
%r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
7373
ret <8 x half> %r
@@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
8888
; CHECK-NEXT: frflags a0
8989
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
9090
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
91-
; CHECK-NEXT: fsflags a0
9291
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
9392
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
93+
; CHECK-NEXT: fsflags a0
9494
; CHECK-NEXT: ret
9595
%r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
9696
ret <16 x half> %r
@@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
112112
; CHECK-NEXT: frflags a0
113113
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
114114
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
115-
; CHECK-NEXT: fsflags a0
116115
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
117116
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
117+
; CHECK-NEXT: fsflags a0
118118
; CHECK-NEXT: ret
119119
%r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
120120
ret <32 x half> %r
@@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp {
135135
; CHECK-NEXT: frflags a0
136136
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
137137
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
138-
; CHECK-NEXT: fsflags a0
139138
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
140139
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
140+
; CHECK-NEXT: fsflags a0
141141
; CHECK-NEXT: ret
142142
%r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
143143
ret <2 x float> %r
@@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp {
158158
; CHECK-NEXT: frflags a0
159159
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
160160
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
161-
; CHECK-NEXT: fsflags a0
162161
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
163162
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
163+
; CHECK-NEXT: fsflags a0
164164
; CHECK-NEXT: ret
165165
%r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
166166
ret <4 x float> %r
@@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp {
181181
; CHECK-NEXT: frflags a0
182182
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
183183
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
184-
; CHECK-NEXT: fsflags a0
185184
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
186185
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
186+
; CHECK-NEXT: fsflags a0
187187
; CHECK-NEXT: ret
188188
%r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
189189
ret <8 x float> %r
@@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
204204
; CHECK-NEXT: frflags a0
205205
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
206206
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
207-
; CHECK-NEXT: fsflags a0
208207
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
209208
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
209+
; CHECK-NEXT: fsflags a0
210210
; CHECK-NEXT: ret
211211
%r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
212212
ret <16 x float> %r
@@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
227227
; CHECK-NEXT: frflags a0
228228
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
229229
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
230-
; CHECK-NEXT: fsflags a0
231230
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
232231
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
232+
; CHECK-NEXT: fsflags a0
233233
; CHECK-NEXT: ret
234234
%r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
235235
ret <2 x double> %r
@@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
250250
; CHECK-NEXT: frflags a0
251251
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
252252
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
253-
; CHECK-NEXT: fsflags a0
254253
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
255254
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
255+
; CHECK-NEXT: fsflags a0
256256
; CHECK-NEXT: ret
257257
%r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
258258
ret <4 x double> %r
@@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
273273
; CHECK-NEXT: frflags a0
274274
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
275275
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
276-
; CHECK-NEXT: fsflags a0
277276
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
278277
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
278+
; CHECK-NEXT: fsflags a0
279279
; CHECK-NEXT: ret
280280
%r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
281281
ret <8 x double> %r

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
359359
; RV32-NEXT: feq.d a0, fa3, fa3
360360
; RV32-NEXT: fmax.d fa3, fa3, fa5
361361
; RV32-NEXT: fmin.d fa3, fa3, fa4
362-
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
363-
; RV32-NEXT: fld fa2, 40(sp)
364362
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
363+
; RV32-NEXT: fld fa3, 40(sp)
365364
; RV32-NEXT: neg a0, a0
366365
; RV32-NEXT: and a0, a0, a2
367-
; RV32-NEXT: feq.d a2, fa2, fa2
368-
; RV32-NEXT: fmax.d fa3, fa2, fa5
366+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
367+
; RV32-NEXT: feq.d a2, fa3, fa3
368+
; RV32-NEXT: fmax.d fa3, fa3, fa5
369369
; RV32-NEXT: fmin.d fa3, fa3, fa4
370370
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
371371
; RV32-NEXT: fld fa3, 32(sp)
@@ -460,13 +460,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
460460
; RV64-NEXT: feq.d a0, fa3, fa3
461461
; RV64-NEXT: fmax.d fa3, fa3, fa5
462462
; RV64-NEXT: fmin.d fa3, fa3, fa4
463-
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
464-
; RV64-NEXT: fld fa2, 40(sp)
465463
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
464+
; RV64-NEXT: fld fa3, 40(sp)
466465
; RV64-NEXT: neg a0, a0
467466
; RV64-NEXT: and a0, a0, a2
468-
; RV64-NEXT: feq.d a2, fa2, fa2
469-
; RV64-NEXT: fmax.d fa3, fa2, fa5
467+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
468+
; RV64-NEXT: feq.d a2, fa3, fa3
469+
; RV64-NEXT: fmax.d fa3, fa3, fa5
470470
; RV64-NEXT: fmin.d fa3, fa3, fa4
471471
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
472472
; RV64-NEXT: fld fa3, 32(sp)
@@ -557,7 +557,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
557557
; RV32-NEXT: vslidedown.vi v8, v8, 3
558558
; RV32-NEXT: vfmv.f.s fa4, v8
559559
; RV32-NEXT: fmax.d fa4, fa4, fa3
560-
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
561560
; RV32-NEXT: fld fa2, 40(sp)
562561
; RV32-NEXT: fmin.d fa4, fa4, fa5
563562
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
@@ -566,9 +565,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
566565
; RV32-NEXT: fmin.d fa2, fa2, fa5
567566
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
568567
; RV32-NEXT: fmax.d fa4, fa4, fa3
569-
; RV32-NEXT: fld fa2, 48(sp)
570568
; RV32-NEXT: fmin.d fa4, fa4, fa5
569+
; RV32-NEXT: fld fa2, 48(sp)
571570
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
571+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
572572
; RV32-NEXT: vslide1down.vx v8, v10, a0
573573
; RV32-NEXT: fmax.d fa4, fa2, fa3
574574
; RV32-NEXT: fmin.d fa4, fa4, fa5
@@ -633,7 +633,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
633633
; RV64-NEXT: vslidedown.vi v8, v8, 3
634634
; RV64-NEXT: vfmv.f.s fa4, v8
635635
; RV64-NEXT: fmax.d fa4, fa4, fa3
636-
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
637636
; RV64-NEXT: fld fa2, 40(sp)
638637
; RV64-NEXT: fmin.d fa4, fa4, fa5
639638
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
@@ -642,9 +641,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
642641
; RV64-NEXT: fmin.d fa2, fa2, fa5
643642
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
644643
; RV64-NEXT: fmax.d fa4, fa4, fa3
645-
; RV64-NEXT: fld fa2, 48(sp)
646644
; RV64-NEXT: fmin.d fa4, fa4, fa5
645+
; RV64-NEXT: fld fa2, 48(sp)
647646
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
647+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
648648
; RV64-NEXT: vslide1down.vx v8, v10, a0
649649
; RV64-NEXT: fmax.d fa4, fa2, fa3
650650
; RV64-NEXT: fmin.d fa4, fa4, fa5

0 commit comments

Comments
 (0)