Skip to content

Commit b663f00

Browse files
committed
get rid of unwanted temporary generated by EmitCall
1 parent b65b4ed commit b663f00

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5324,6 +5324,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
53245324
IRCallArgs[FirstIRArg] = Val;
53255325
break;
53265326
}
5327+
} else if (I->getType()->isArrayParameterType()) {
5328+
// use the tmp created by the HLSLOutArgExpr
5329+
// instead of creating a new one below and copying the tmp into it.
5330+
IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal();
5331+
break;
53275332
}
53285333

53295334
// For non-aggregate args and aggregate args meeting conditions above

clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ void increment(inout int Arr[2]) {
99
// CHECK-LABEL: arrayCall
1010
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
1111
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
12-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
1312
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
1413
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
15-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
16-
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]]) #3
14+
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3
1715
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
1816
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
1917
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -33,10 +31,8 @@ void fn2(out int Arr[2]) {
3331
// CHECK-LABEL: arrayCall2
3432
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
3533
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
36-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
3734
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
38-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
39-
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]]) #3
35+
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3
4036
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
4137
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
4238
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -58,11 +54,9 @@ void nestedCall(inout int Arr[2], uint index) {
5854
// CHECK-LABEL: arrayCall3
5955
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
6056
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
61-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
6257
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
6358
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
64-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
65-
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]], i32 noundef 0) #3
59+
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0) #3
6660
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
6761
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1
6862
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -75,10 +69,8 @@ export int arrayCall3() {
7569

7670
// CHECK-LABEL: outerCall
7771
// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4
78-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
7972
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false)
80-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
81-
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]]) #3
73+
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3
8274
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false)
8375
// CHECK-NEXT: ret void
8476
void outerCall(inout int Arr[2]) {
@@ -88,11 +80,9 @@ void outerCall(inout int Arr[2]) {
8880
// CHECK-LABEL: arrayCall4
8981
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
9082
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
91-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
9283
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
9384
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
94-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
95-
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]]) #3
85+
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3
9686
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
9787
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
9888
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -118,11 +108,9 @@ void outerCall2(inout int Arr[2]) {
118108
// CHECK-LABEL: arrayCall5
119109
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
120110
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
121-
// CHECK-NEXT: [[Tmp2:%.*]] = alloca [2 x i32], align 4
122111
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
123112
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
124-
// CHECK-NEXT: store ptr [[Tmp]], ptr [[Tmp2]], align 4
125-
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp2]]) #3
113+
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]]) #3
126114
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
127115
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
128116
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4

0 commit comments

Comments
 (0)