Skip to content

Commit a520b95

Browse files
committed
[CIR] [FlattenCFG] hoist allocas to entry block for funcOp in flattenCFG
1 parent db6b7c0 commit a520b95

20 files changed

+191
-43
lines changed

clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp

+36-5
Original file line numberDiff line numberDiff line change
@@ -869,11 +869,41 @@ class CIRTernaryOpFlattening
869869
}
870870
};
871871

872+
// Hoist all static allocas to the entry block.
873+
class CIRAllocaOpFlattening : public mlir::OpRewritePattern<mlir::cir::AllocaOp> {
874+
public:
875+
using OpRewritePattern<mlir::cir::AllocaOp>::OpRewritePattern;
876+
877+
mlir::LogicalResult
878+
matchAndRewrite(mlir::cir::AllocaOp allocaOp,
879+
mlir::PatternRewriter &rewriter) const override {
880+
mlir::cir::FuncOp funcOp = allocaOp->getParentOfType<mlir::cir::FuncOp>();
881+
if (!funcOp || funcOp.getRegion().empty())
882+
return mlir::failure();
883+
884+
mlir::Block &entryBlock = funcOp.getRegion().front();
885+
886+
if (allocaOp->getBlock() == &entryBlock)
887+
return mlir::failure();
888+
889+
// Don't hoist allocas with dynamic alloca size.
890+
if (allocaOp.getDynAllocSize() != mlir::Value())
891+
return mlir::failure();
892+
893+
// It is cheaper to call `mlir::Operation::moveBefore` than using rewriter.
894+
// So we prefer to manually here.
895+
mlir::Operation *insertPoint = &*entryBlock.begin();
896+
allocaOp->moveBefore(insertPoint);
897+
898+
return mlir::success();
899+
}
900+
};
901+
872902
void populateFlattenCFGPatterns(RewritePatternSet &patterns) {
873-
patterns
874-
.add<CIRIfFlattening, CIRLoopOpInterfaceFlattening, CIRScopeOpFlattening,
875-
CIRSwitchOpFlattening, CIRTernaryOpFlattening, CIRTryOpFlattening>(
876-
patterns.getContext());
903+
patterns.add<CIRIfFlattening, CIRLoopOpInterfaceFlattening,
904+
CIRScopeOpFlattening, CIRSwitchOpFlattening,
905+
CIRTernaryOpFlattening, CIRTryOpFlattening, CIRAllocaOpFlattening>(
906+
patterns.getContext());
877907
}
878908

879909
void FlattenCFGPass::runOnOperation() {
@@ -883,7 +913,8 @@ void FlattenCFGPass::runOnOperation() {
883913
// Collect operations to apply patterns.
884914
SmallVector<Operation *, 16> ops;
885915
getOperation()->walk<mlir::WalkOrder::PostOrder>([&](Operation *op) {
886-
if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, TryOp>(op))
916+
if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, TryOp, AllocaOp>(
917+
op))
887918
ops.push_back(op);
888919
});
889920

clang/test/CIR/CodeGen/OpenCL/addrspace-alloca.cl

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -triple spirv64-unknown-unknown %s -o %t.ll
44
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
55

6-
6+
void consume(global char*);
77
// CIR: cir.func @func(%arg0: !cir.ptr<!s32i, addrspace(offload_local)>
88
// LLVM: @func(ptr addrspace(3)
99
kernel void func(local int *p) {
@@ -30,5 +30,8 @@ kernel void func(local int *p) {
3030
// CIR-NEXT: cir.store %[[#ALLOCA_X]], %[[#ALLOCA_PTR]] : !cir.ptr<!s32i, addrspace(offload_private)>, !cir.ptr<!cir.ptr<!s32i, addrspace(offload_private)>, addrspace(offload_private)>
3131
// LLVM-NEXT: store ptr %[[#ALLOCA_X]], ptr %[[#ALLOCA_PTR]]
3232

33+
// make sure the local variable won't be optimized out.
34+
consume(b);
35+
3336
return;
3437
}

clang/test/CIR/CodeGen/aarch64-neon-vdup-lane.c

+12
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ int8_t test_vdupb_lane_s8(int8x8_t src) {
1919
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
2020

2121
// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]])
22+
// LLVM: alloca <8 x i8>
2223
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
2324
// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
2425
// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
@@ -36,6 +37,7 @@ int8_t test_vdupb_laneq_s8(int8x16_t a) {
3637
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
3738

3839
// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]])
40+
// LLVM: alloca <16 x i8>
3941
// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
4042
// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
4143
// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
@@ -54,6 +56,7 @@ int16_t test_vduph_lane_s16(int16x4_t src) {
5456

5557

5658
// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]])
59+
// LLVM: alloca <4 x i16>
5760
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
5861
// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
5962
// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
@@ -71,6 +74,7 @@ int16_t test_vduph_laneq_s16(int16x8_t a) {
7174
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
7275

7376
// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]])
77+
// LLVM: alloca <8 x i16>
7478
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
7579
// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
7680
// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
@@ -88,6 +92,7 @@ int32_t test_vdups_lane_s32(int32x2_t a) {
8892
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
8993

9094
// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]])
95+
// LLVM: alloca <2 x i32>
9196
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
9297
// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
9398
// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
@@ -105,6 +110,7 @@ int32_t test_vdups_laneq_s32(int32x4_t a) {
105110
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
106111

107112
// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]])
113+
// LLVM: alloca <4 x i32>
108114
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
109115
// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
110116
// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
@@ -122,6 +128,7 @@ int64_t test_vdupd_lane_s64(int64x1_t src) {
122128
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
123129

124130
// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]])
131+
// LLVM: alloca <1 x i64>
125132
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
126133
// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
127134
// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
@@ -139,6 +146,7 @@ int64_t test_vdupd_laneq_s64(int64x2_t a) {
139146
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
140147

141148
// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]])
149+
// LLVM: alloca <2 x i64>
142150
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
143151
// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
144152
// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
@@ -156,6 +164,7 @@ float32_t test_vdups_lane_f32(float32x2_t src) {
156164
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
157165

158166
// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]])
167+
// LLVM: alloca <2 x float>
159168
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
160169
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
161170
// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
@@ -173,6 +182,7 @@ float64_t test_vdupd_lane_f64(float64x1_t src) {
173182
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
174183

175184
// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]])
185+
// LLVM: alloca <1 x double>
176186
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
177187
// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
178188
// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
@@ -190,6 +200,7 @@ float32_t test_vdups_laneq_f32(float32x4_t src) {
190200
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
191201

192202
// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]])
203+
// LLVM: alloca <4 x float>
193204
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
194205
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
195206
// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
@@ -207,6 +218,7 @@ float64_t test_vdupd_laneq_f64(float64x2_t src) {
207218
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
208219

209220
// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]])
221+
// LLVM: alloca <2 x double>
210222
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
211223
// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
212224
// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16

clang/test/CIR/CodeGen/aarch64-neon-vget.c

+12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ uint8_t test_vget_lane_u8(uint8x8_t a) {
2323
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
2424

2525
// LLVM: define dso_local i8 @test_vget_lane_u8(<8 x i8> [[ARG:%.*]])
26+
// LLVM: alloca <8 x i8>
2627
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
2728
// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
2829
// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
@@ -40,6 +41,7 @@ uint8_t test_vgetq_lane_u8(uint8x16_t a) {
4041
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
4142

4243
// LLVM: define dso_local i8 @test_vgetq_lane_u8(<16 x i8> [[ARG:%.*]])
44+
// LLVM: alloca <16 x i8>
4345
// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
4446
// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
4547
// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
@@ -57,6 +59,7 @@ uint16_t test_vget_lane_u16(uint16x4_t a) {
5759
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
5860

5961
// LLVM: define dso_local i16 @test_vget_lane_u16(<4 x i16> [[ARG:%.*]])
62+
// LLVM: alloca <4 x i16>
6063
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
6164
// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
6265
// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
@@ -74,6 +77,7 @@ uint16_t test_vgetq_lane_u16(uint16x8_t a) {
7477
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
7578

7679
// LLVM: define dso_local i16 @test_vgetq_lane_u16(<8 x i16> [[ARG:%.*]])
80+
// LLVM: alloca <8 x i16>
7781
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
7882
// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
7983
// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
@@ -91,6 +95,7 @@ uint32_t test_vget_lane_u32(uint32x2_t a) {
9195
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
9296

9397
// LLVM: define dso_local i32 @test_vget_lane_u32(<2 x i32> [[ARG:%.*]])
98+
// LLVM: alloca <2 x i32>
9499
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
95100
// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
96101
// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
@@ -108,6 +113,7 @@ uint32_t test_vgetq_lane_u32(uint32x4_t a) {
108113
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
109114

110115
// LLVM: define dso_local i32 @test_vgetq_lane_u32(<4 x i32> [[ARG:%.*]])
116+
// LLVM: alloca <4 x i32>
111117
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
112118
// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
113119
// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
@@ -125,6 +131,7 @@ uint64_t test_vget_lane_u64(uint64x1_t a) {
125131
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
126132

127133
// LLVM: define dso_local i64 @test_vget_lane_u64(<1 x i64> [[ARG:%.*]])
134+
// LLVM: alloca <1 x i64>
128135
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
129136
// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
130137
// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
@@ -142,6 +149,7 @@ uint64_t test_vgetq_lane_u64(uint64x2_t a) {
142149
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
143150

144151
// LLVM: define dso_local i64 @test_vgetq_lane_u64(<2 x i64> [[ARG:%.*]])
152+
// LLVM: alloca <2 x i64>
145153
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
146154
// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
147155
// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
@@ -159,6 +167,7 @@ float32_t test_vget_lane_f32(float32x2_t a) {
159167
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
160168

161169
// LLVM: define dso_local float @test_vget_lane_f32(<2 x float> [[ARG:%.*]])
170+
// LLVM: alloca <2 x float>
162171
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
163172
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
164173
// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
@@ -176,6 +185,7 @@ float64_t test_vget_lane_f64(float64x1_t a) {
176185
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
177186

178187
// LLVM: define dso_local double @test_vget_lane_f64(<1 x double> [[ARG:%.*]])
188+
// LLVM: alloca <1 x double>
179189
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
180190
// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
181191
// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
@@ -193,6 +203,7 @@ float32_t test_vgetq_lane_f32(float32x4_t a) {
193203
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
194204

195205
// LLVM: define dso_local float @test_vgetq_lane_f32(<4 x float> [[ARG:%.*]])
206+
// LLVM: alloca <4 x float>
196207
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
197208
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
198209
// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
@@ -210,6 +221,7 @@ float64_t test_vgetq_lane_f64(float64x2_t a) {
210221
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
211222

212223
// LLVM: define dso_local double @test_vgetq_lane_f64(<2 x double> [[ARG:%.*]])
224+
// LLVM: alloca <2 x double>
213225
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
214226
// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
215227
// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16

clang/test/CIR/CodeGen/aarch64-neon-vset.c

+20
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
2525
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8>
2626

2727
// LLVM: define dso_local <8 x i8> @test_vset_lane_u8(i8 [[A:%.*]], <8 x i8> [[B:%.*]])
28+
// LLVM: alloca <8 x i8>
29+
// LLVM: alloca <8 x i8>
2830
// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
2931
// LLVM: [[B_ADR:%.*]] = alloca <8 x i8>, i64 1, align 8
3032
// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
@@ -47,6 +49,8 @@ uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
4749
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4>
4850

4951
// LLVM: define dso_local <4 x i16> @test_vset_lane_u16(i16 [[A:%.*]], <4 x i16> [[B:%.*]])
52+
// LLVM: alloca <4 x i16>
53+
// LLVM: alloca <4 x i16>
5054
// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
5155
// LLVM: [[B_ADR:%.*]] = alloca <4 x i16>, i64 1, align 8
5256
// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
@@ -69,6 +73,8 @@ uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
6973
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2>
7074

7175
// LLVM: define dso_local <2 x i32> @test_vset_lane_u32(i32 [[A:%.*]], <2 x i32> [[B:%.*]])
76+
// LLVM: alloca <2 x i32>
77+
// LLVM: alloca <2 x i32>
7278
// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
7379
// LLVM: [[B_ADR:%.*]] = alloca <2 x i32>, i64 1, align 8
7480
// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
@@ -92,6 +98,8 @@ int64x1_t test_vset_lane_u64(int64_t a, int64x1_t b) {
9298
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1>
9399

94100
// LLVM: define dso_local <1 x i64> @test_vset_lane_u64(i64 [[A:%.*]], <1 x i64> [[B:%.*]])
101+
// LLVM: alloca <1 x i64>
102+
// LLVM: alloca <1 x i64>
95103
// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
96104
// LLVM: [[B_ADR:%.*]] = alloca <1 x i64>, i64 1, align 8
97105
// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
@@ -114,6 +122,8 @@ float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
114122
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
115123

116124
// LLVM: define dso_local <2 x float> @test_vset_lane_f32(float [[A:%.*]], <2 x float> [[B:%.*]])
125+
// LLVM: alloca <2 x float>
126+
// LLVM: alloca <2 x float>
117127
// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
118128
// LLVM: [[B_ADR:%.*]] = alloca <2 x float>, i64 1, align 8
119129
// LLVM: store float [[A]], ptr [[A_ADR]], align 4
@@ -136,6 +146,8 @@ uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
136146
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16>
137147

138148
// LLVM: define dso_local <16 x i8> @test_vsetq_lane_u8(i8 [[A:%.*]], <16 x i8> [[B:%.*]])
149+
// LLVM: alloca <16 x i8>
150+
// LLVM: alloca <16 x i8>
139151
// LLVM: [[A_ADR:%.*]] = alloca i8, i64 1, align 1
140152
// LLVM: [[B_ADR:%.*]] = alloca <16 x i8>, i64 1, align 16
141153
// LLVM: store i8 [[A]], ptr [[A_ADR]], align 1
@@ -158,6 +170,8 @@ uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
158170
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8>
159171

160172
// LLVM: define dso_local <8 x i16> @test_vsetq_lane_u16(i16 [[A:%.*]], <8 x i16> [[B:%.*]])
173+
// LLVM: alloca <8 x i16>
174+
// LLVM: alloca <8 x i16>
161175
// LLVM: [[A_ADR:%.*]] = alloca i16, i64 1, align 2
162176
// LLVM: [[B_ADR:%.*]] = alloca <8 x i16>, i64 1, align 16
163177
// LLVM: store i16 [[A]], ptr [[A_ADR]], align 2
@@ -180,6 +194,8 @@ uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
180194
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4>
181195

182196
// LLVM: define dso_local <4 x i32> @test_vsetq_lane_u32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
197+
// LLVM: alloca <4 x i32>
198+
// LLVM: alloca <4 x i32>
183199
// LLVM: [[A_ADR:%.*]] = alloca i32, i64 1, align 4
184200
// LLVM: [[B_ADR:%.*]] = alloca <4 x i32>, i64 1, align 16
185201
// LLVM: store i32 [[A]], ptr [[A_ADR]], align 4
@@ -202,6 +218,8 @@ int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
202218
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2>
203219

204220
// LLVM: define dso_local <2 x i64> @test_vsetq_lane_s64(i64 [[A:%.*]], <2 x i64> [[B:%.*]])
221+
// LLVM: alloca <2 x i64>
222+
// LLVM: alloca <2 x i64>
205223
// LLVM: [[A_ADR:%.*]] = alloca i64, i64 1, align 8
206224
// LLVM: [[B_ADR:%.*]] = alloca <2 x i64>, i64 1, align 16
207225
// LLVM: store i64 [[A]], ptr [[A_ADR]], align 8
@@ -224,6 +242,8 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
224242
// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
225243

226244
// LLVM: define dso_local <4 x float> @test_vsetq_lane_f32(float [[A:%.*]], <4 x float> [[B:%.*]])
245+
// LLVM: alloca <4 x float>
246+
// LLVM: alloca <4 x float>
227247
// LLVM: [[A_ADR:%.*]] = alloca float, i64 1, align 4
228248
// LLVM: [[B_ADR:%.*]] = alloca <4 x float>, i64 1, align 16
229249
// LLVM: store float [[A]], ptr [[A_ADR]], align 4

clang/test/CIR/CodeGen/builtin-bit-cast.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ two_ints test_rvalue_aggregate() {
130130
// CIR: }
131131

132132
// LLVM-LABEL: define dso_local %struct.two_ints @_Z21test_rvalue_aggregatev
133-
// LLVM: %[[#SRC_SLOT:]] = alloca i64, i64 1, align 8
134-
// LLVM-NEXT: store i64 42, ptr %[[#SRC_SLOT]], align 8
133+
// LLVM: %[[#SRC_SLOT:]] = alloca i64, i64 1, align 8
134+
// LLVM: store i64 42, ptr %[[#SRC_SLOT]], align 8
135135
// LLVM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %{{.+}}, ptr %[[#SRC_SLOT]], i64 8, i1 false)
136136
// LLVM: }

clang/test/CIR/CodeGen/call-via-class-member-funcptr.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@ class f {
1414
a g;
1515
};
1616
const char *f::b() { return g.b(h); }
17-
void fn1() { f f1; }
17+
// Make sure f1 won't be optimized out.
18+
void consume(void*);
19+
void fn1() {
20+
f f1;
21+
consume(&f1);
22+
}
1823

1924
// CIR: ty_a = !cir.struct<class "a" {!cir.int<u, 8>} #cir.record.decl.ast>
2025
// CIR: ty_f = !cir.struct<class "f" {!cir.struct<class "a" {!cir.int<u, 8>} #cir.record.decl.ast>}>
@@ -23,7 +28,7 @@ void fn1() { f f1; }
2328
// CIR: cir.func private @_ZN1a1bEi(!s32i) -> !cir.ptr<!s8i>
2429

2530
// CIR: cir.func @_ZN1f1bEv(%arg0: !cir.ptr<!ty_f> loc{{.*}}) -> !cir.ptr<!s8i>
26-
// CIR: [[H_PTR:%.*]] = cir.get_global @h : !cir.ptr<!s32i> loc(#loc18)
31+
// CIR: [[H_PTR:%.*]] = cir.get_global @h : !cir.ptr<!s32i>
2732
// CIR: [[H_VAL:%.*]] = cir.load [[H_PTR]] : !cir.ptr<!s32i>, !s32i
2833
// CIR: [[RET1_VAL:%.*]] = cir.call @_ZN1a1bEi([[H_VAL]]) : (!s32i) -> !cir.ptr<!s8i>
2934
// CIR: cir.store [[RET1_VAL]], [[RET1_P:%.*]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>

0 commit comments

Comments
 (0)