Skip to content

Commit 3fbee98

Browse files
ChuanqiXu9lanza
authored andcommitted
[CIR][Lowering] Introduce HoistAllocasPass (#887)
Close #883. See the above issue for details
1 parent eda03bf commit 3fbee98

17 files changed

+214
-128
lines changed

clang/include/clang/CIR/Dialect/Passes.h

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ std::unique_ptr<Pass> createIdiomRecognizerPass(clang::ASTContext *astCtx);
3737
std::unique_ptr<Pass> createLibOptPass();
3838
std::unique_ptr<Pass> createLibOptPass(clang::ASTContext *astCtx);
3939
std::unique_ptr<Pass> createFlattenCFGPass();
40+
std::unique_ptr<Pass> createHoistAllocasPass();
4041
std::unique_ptr<Pass> createGotoSolverPass();
4142

4243
/// Create a pass to lower ABI-independent function definitions/calls.

clang/include/clang/CIR/Dialect/Passes.td

+10
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ def SCFPrepare : Pass<"cir-mlir-scf-prepare"> {
107107
let dependentDialects = ["cir::CIRDialect"];
108108
}
109109

110+
def HoistAllocas : Pass<"cir-hoist-allocas"> {
111+
let summary = "Hoist allocas to the entry of the function";
112+
let description = [{
113+
This pass hoist all non-dynamic allocas to the entry of the function.
114+
This is helpful for later code generation.
115+
}];
116+
let constructor = "mlir::createHoistAllocasPass()";
117+
let dependentDialects = ["cir::CIRDialect"];
118+
}
119+
110120
def FlattenCFG : Pass<"cir-flatten-cfg"> {
111121
let summary = "Produces flatten cfg";
112122
let description = [{

clang/lib/CIR/CodeGen/CIRPasses.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ mlir::LogicalResult runCIRToCIRPasses(
102102
namespace mlir {
103103

104104
void populateCIRPreLoweringPasses(OpPassManager &pm) {
105+
pm.addPass(createHoistAllocasPass());
105106
pm.addPass(createFlattenCFGPass());
106107
pm.addPass(createGotoSolverPass());
107108
}

clang/lib/CIR/Dialect/Transforms/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_clang_library(MLIRCIRTransforms
1313
GotoSolver.cpp
1414
SCFPrepare.cpp
1515
CallConvLowering.cpp
16+
HoistAllocas.cpp
1617

1718
DEPENDS
1819
MLIRCIRPassIncGen
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//====- HoistAllocas.cpp --------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "PassDetail.h"
10+
#include "mlir/Dialect/Func/IR/FuncOps.h"
11+
#include "mlir/IR/PatternMatch.h"
12+
#include "mlir/Support/LogicalResult.h"
13+
#include "mlir/Transforms/DialectConversion.h"
14+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
15+
#include "clang/CIR/Dialect/IR/CIRDialect.h"
16+
#include "clang/CIR/Dialect/Passes.h"
17+
18+
#include "llvm/Support/TimeProfiler.h"
19+
20+
using namespace mlir;
21+
using namespace mlir::cir;
22+
23+
namespace {
24+
25+
struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
26+
27+
HoistAllocasPass() = default;
28+
void runOnOperation() override;
29+
};
30+
31+
static void process(mlir::cir::FuncOp func) {
32+
if (func.getRegion().empty())
33+
return;
34+
35+
// Hoist all static allocas to the entry block.
36+
mlir::Block &entryBlock = func.getRegion().front();
37+
llvm::SmallVector<mlir::cir::AllocaOp> allocas;
38+
func.getBody().walk([&](mlir::cir::AllocaOp alloca) {
39+
if (alloca->getBlock() == &entryBlock)
40+
return;
41+
// Don't hoist allocas with dynamic alloca size.
42+
if (alloca.getDynAllocSize())
43+
return;
44+
allocas.push_back(alloca);
45+
});
46+
if (allocas.empty())
47+
return;
48+
49+
mlir::Operation *insertPoint = &*entryBlock.begin();
50+
51+
for (auto alloca : allocas)
52+
alloca->moveBefore(insertPoint);
53+
}
54+
55+
void HoistAllocasPass::runOnOperation() {
56+
llvm::TimeTraceScope scope("Hoist Allocas");
57+
SmallVector<Operation *, 16> ops;
58+
getOperation()->walk([&](mlir::cir::FuncOp op) { process(op); });
59+
}
60+
61+
} // namespace
62+
63+
std::unique_ptr<Pass> mlir::createHoistAllocasPass() {
64+
return std::make_unique<HoistAllocasPass>();
65+
}

clang/test/CIR/CodeGen/AArch64/neon-ldst.c

+30-90
Large diffs are not rendered by default.

clang/test/CIR/CodeGen/AArch64/neon-misc.c

+32
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
2020
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8>
2121

2222
// LLVM: define dso_local <8 x i8> @test_vset_lane_u8(i8 [[A:%.*]], <8 x i8> [[B:%.*]])
23+
// LLVM: alloca <8 x i8>
24+
// LLVM: alloca i8
2325
// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
2426
// LLVM: [[B_ADR:%.*]] = alloca <8 x i8>, i64 1, align 8
2527
// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
@@ -42,6 +44,8 @@ uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
4244
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4>
4345

4446
// LLVM: define dso_local <4 x i16> @test_vset_lane_u16(i16 [[A:%.*]], <4 x i16> [[B:%.*]])
47+
// LLVM: alloca <4 x i16>
48+
// LLVM: alloca i16
4549
// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
4650
// LLVM: [[B_ADR:%.*]] = alloca <4 x i16>, i64 1, align 8
4751
// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
@@ -64,6 +68,8 @@ uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
6468
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2>
6569

6670
// LLVM: define dso_local <2 x i32> @test_vset_lane_u32(i32 [[A:%.*]], <2 x i32> [[B:%.*]])
71+
// LLVM: alloca <2 x i32>
72+
// LLVM: alloca i32
6773
// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
6874
// LLVM: [[B_ADR:%.*]] = alloca <2 x i32>, i64 1, align 8
6975
// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
@@ -87,6 +93,8 @@ int64x1_t test_vset_lane_u64(int64_t a, int64x1_t b) {
8793
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1>
8894

8995
// LLVM: define dso_local <1 x i64> @test_vset_lane_u64(i64 [[A:%.*]], <1 x i64> [[B:%.*]])
96+
// LLVM: alloca <1 x i64>
97+
// LLVM: alloca i64
9098
// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
9199
// LLVM: [[B_ADR:%.*]] = alloca <1 x i64>, i64 1, align 8
92100
// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
@@ -109,6 +117,8 @@ float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
109117
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
110118

111119
// LLVM: define dso_local <2 x float> @test_vset_lane_f32(float [[A:%.*]], <2 x float> [[B:%.*]])
120+
// LLVM: alloca <2 x float>
121+
// LLVM: alloca float
112122
// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
113123
// LLVM: [[B_ADR:%.*]] = alloca <2 x float>, i64 1, align 8
114124
// LLVM: store float [[A]], ptr [[A_ADR]], align 4
@@ -131,6 +141,8 @@ uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
131141
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16>
132142

133143
// LLVM: define dso_local <16 x i8> @test_vsetq_lane_u8(i8 [[A:%.*]], <16 x i8> [[B:%.*]])
144+
// LLVM: alloca <16 x i8>
145+
// LLVM: alloca i8
134146
// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
135147
// LLVM: [[B_ADR:%.*]] = alloca <16 x i8>, i64 1, align 16
136148
// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
@@ -153,6 +165,8 @@ uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
153165
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8>
154166

155167
// LLVM: define dso_local <8 x i16> @test_vsetq_lane_u16(i16 [[A:%.*]], <8 x i16> [[B:%.*]])
168+
// LLVM: alloca <8 x i16>
169+
// LLVM: alloca i16
156170
// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
157171
// LLVM: [[B_ADR:%.*]] = alloca <8 x i16>, i64 1, align 16
158172
// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
@@ -175,6 +189,8 @@ uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
175189
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4>
176190

177191
// LLVM: define dso_local <4 x i32> @test_vsetq_lane_u32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
192+
// LLVM: alloca <4 x i32>
193+
// LLVM: alloca i32
178194
// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
179195
// LLVM: [[B_ADR:%.*]] = alloca <4 x i32>, i64 1, align 16
180196
// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
@@ -197,6 +213,8 @@ int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
197213
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2>
198214

199215
// LLVM: define dso_local <2 x i64> @test_vsetq_lane_s64(i64 [[A:%.*]], <2 x i64> [[B:%.*]])
216+
// LLVM: alloca <2 x i64>
217+
// LLVM: alloca i64
200218
// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
201219
// LLVM: [[B_ADR:%.*]] = alloca <2 x i64>, i64 1, align 16
202220
// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
@@ -219,6 +237,8 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
219237
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
220238

221239
// LLVM: define dso_local <4 x float> @test_vsetq_lane_f32(float [[A:%.*]], <4 x float> [[B:%.*]])
240+
// LLVM: alloca <4 x float>
241+
// LLVM: alloca float
222242
// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
223243
// LLVM: [[B_ADR:%.*]] = alloca <4 x float>, i64 1, align 16
224244
// LLVM: store float [[A]], ptr [[A_ADR]], align 4
@@ -241,6 +261,7 @@ uint8_t test_vget_lane_u8(uint8x8_t a) {
241261
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
242262

243263
// LLVM: define dso_local i8 @test_vget_lane_u8(<8 x i8> [[ARG:%.*]])
264+
// LLVM: alloca <8 x i8>
244265
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
245266
// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
246267
// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
@@ -258,6 +279,7 @@ uint8_t test_vgetq_lane_u8(uint8x16_t a) {
258279
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
259280

260281
// LLVM: define dso_local i8 @test_vgetq_lane_u8(<16 x i8> [[ARG:%.*]])
282+
// LLVM: alloca <16 x i8>
261283
// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
262284
// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
263285
// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
@@ -275,6 +297,7 @@ uint16_t test_vget_lane_u16(uint16x4_t a) {
275297
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
276298

277299
// LLVM: define dso_local i16 @test_vget_lane_u16(<4 x i16> [[ARG:%.*]])
300+
// LLVM: alloca <4 x i16>
278301
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
279302
// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
280303
// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
@@ -292,6 +315,7 @@ uint16_t test_vgetq_lane_u16(uint16x8_t a) {
292315
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
293316

294317
// LLVM: define dso_local i16 @test_vgetq_lane_u16(<8 x i16> [[ARG:%.*]])
318+
// LLVM: alloca <8 x i16>
295319
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
296320
// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
297321
// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
@@ -309,6 +333,7 @@ uint32_t test_vget_lane_u32(uint32x2_t a) {
309333
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
310334

311335
// LLVM: define dso_local i32 @test_vget_lane_u32(<2 x i32> [[ARG:%.*]])
336+
// LLVM: alloca <2 x i32>
312337
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
313338
// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
314339
// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
@@ -326,6 +351,7 @@ uint32_t test_vgetq_lane_u32(uint32x4_t a) {
326351
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
327352

328353
// LLVM: define dso_local i32 @test_vgetq_lane_u32(<4 x i32> [[ARG:%.*]])
354+
// LLVM: alloca <4 x i32>
329355
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
330356
// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
331357
// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
@@ -343,6 +369,7 @@ uint64_t test_vget_lane_u64(uint64x1_t a) {
343369
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
344370

345371
// LLVM: define dso_local i64 @test_vget_lane_u64(<1 x i64> [[ARG:%.*]])
372+
// LLVM: alloca <1 x i64>
346373
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
347374
// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
348375
// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
@@ -360,6 +387,7 @@ uint64_t test_vgetq_lane_u64(uint64x2_t a) {
360387
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
361388

362389
// LLVM: define dso_local i64 @test_vgetq_lane_u64(<2 x i64> [[ARG:%.*]])
390+
// LLVM: alloca <2 x i64>
363391
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
364392
// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
365393
// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
@@ -377,6 +405,7 @@ float32_t test_vget_lane_f32(float32x2_t a) {
377405
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
378406

379407
// LLVM: define dso_local float @test_vget_lane_f32(<2 x float> [[ARG:%.*]])
408+
// LLVM: alloca <2 x float>
380409
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
381410
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
382411
// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
@@ -394,6 +423,7 @@ float64_t test_vget_lane_f64(float64x1_t a) {
394423
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
395424

396425
// LLVM: define dso_local double @test_vget_lane_f64(<1 x double> [[ARG:%.*]])
426+
// LLVM: alloca <1 x double>
397427
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
398428
// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
399429
// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
@@ -411,6 +441,7 @@ float32_t test_vgetq_lane_f32(float32x4_t a) {
411441
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
412442

413443
// LLVM: define dso_local float @test_vgetq_lane_f32(<4 x float> [[ARG:%.*]])
444+
// LLVM: alloca <4 x float>
414445
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
415446
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
416447
// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
@@ -428,6 +459,7 @@ float64_t test_vgetq_lane_f64(float64x2_t a) {
428459
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
429460

430461
// LLVM: define dso_local double @test_vgetq_lane_f64(<2 x double> [[ARG:%.*]])
462+
// LLVM: alloca <2 x double>
431463
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
432464
// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
433465
// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16

clang/test/CIR/CodeGen/AArch64/neon.c

+3-9
Original file line numberDiff line numberDiff line change
@@ -6008,9 +6008,7 @@ uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
60086008
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> {{%.*}} to <16 x i8>
60096009
// LLVM: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
60106010
// LLVM: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6011-
// LLVM: store <8 x i8> [[VQRSHRUN_N1]], ptr [[RET:%.*]], align 8
6012-
// LLVM: [[RETVAL:%.*]] = load <8 x i8>, ptr [[RET]], align 8
6013-
// LLVM: ret <8 x i8> [[RETVAL]]
6011+
// LLVM: ret <8 x i8> [[VQRSHRUN_N1]]
60146012
}
60156013

60166014
uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
@@ -6025,9 +6023,7 @@ uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
60256023
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> {{%.*}} to <16 x i8>
60266024
// LLVM: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
60276025
// LLVM: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6028-
// LLVM: store <4 x i16> [[VQRSHRUN_N1]], ptr [[RET:%.*]], align 8
6029-
// LLVM: [[RETVAL:%.*]] = load <4 x i16>, ptr [[RET]], align 8
6030-
// LLVM: ret <4 x i16> [[RETVAL]]
6026+
// LLVM: ret <4 x i16> [[VQRSHRUN_N1]]
60316027
}
60326028

60336029
uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
@@ -6042,9 +6038,7 @@ uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
60426038
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
60436039
// LLVM: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
60446040
// LLVM: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6045-
// LLVM: store <2 x i32> [[VQRSHRUN_N1]], ptr [[RET:%.*]], align 8
6046-
// LLVM: [[RETVAL:%.*]] = load <2 x i32>, ptr [[RET]], align 8
6047-
// LLVM: ret <2 x i32> [[RETVAL]]
6041+
// LLVM: ret <2 x i32> [[VQRSHRUN_N1]]
60486042
}
60496043

60506044
// NYI-LABEL: @test_vqrshrun_high_n_s16(

0 commit comments

Comments
 (0)