Skip to content

Commit fa0598a

Browse files
bruteforceboylanza
authored andcommitted
[CIR][ABI][AArch64][Lowering] Support calls for struct types > 128 bits (#1074)
As the title says, this PR adds support for calls with struct types > 128 bits, building upon this [PR](#1068). The idea is gotten from the original Codegen, and I have added a couple of tests.
1 parent b3c2b94 commit fa0598a

File tree

3 files changed

+89
-1
lines changed

3 files changed

+89
-1
lines changed

clang/include/clang/CIR/MissingFeatures.h

+2
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,8 @@ struct MissingFeatures {
404404
static bool itaniumRecordLayoutBuilderFinishLayout() { return false; }
405405

406406
static bool mustProgress() { return false; }
407+
408+
static bool skipTempCopy() { return false; }
407409
};
408410

409411
} // namespace cir

clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerFunction.cpp

+50-1
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,14 @@ LowerFunction::buildFunctionEpilog(const LowerFunctionInfo &FI) {
704704
if (auto al = findAlloca(ret)) {
705705
rewriter.replaceAllUsesWith(al.getResult(), RVAddr);
706706
rewriter.eraseOp(al);
707+
rewriter.setInsertionPoint(ret);
708+
709+
auto retInputs = ret.getInput();
710+
assert(retInputs.size() == 1 && "return should only have one input");
711+
if (auto load = mlir::dyn_cast<LoadOp>(retInputs[0].getDefiningOp()))
712+
if (load.getResult().use_empty())
713+
rewriter.eraseOp(load);
714+
707715
rewriter.replaceOpWithNewOp<ReturnOp>(ret);
708716
}
709717
});
@@ -952,6 +960,15 @@ mlir::Value LowerFunction::rewriteCallOp(FuncType calleeTy, FuncOp origCallee,
952960
return CallResult;
953961
}
954962

963+
mlir::Value createAlloca(mlir::Location loc, mlir::Type type,
964+
LowerFunction &CGF) {
965+
auto align = CGF.LM.getDataLayout().getABITypeAlign(type);
966+
auto alignAttr = CGF.getRewriter().getI64IntegerAttr(align.value());
967+
return CGF.getRewriter().create<AllocaOp>(
968+
loc, CGF.getRewriter().getType<PointerType>(type), type,
969+
/*name=*/llvm::StringRef(""), alignAttr);
970+
}
971+
955972
// NOTE(cir): This method has partial parity to CodeGenFunction's EmitCall
956973
// method in CGCall.cpp. When incrementing it, use the original codegen as a
957974
// reference: add ABI-specific stuff and skip codegen stuff.
@@ -984,10 +1001,12 @@ mlir::Value LowerFunction::rewriteCallOp(const LowerFunctionInfo &CallInfo,
9841001
CIRToCIRArgMapping IRFunctionArgs(LM.getContext(), CallInfo);
9851002
llvm::SmallVector<mlir::Value, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());
9861003

1004+
mlir::Value sRetPtr;
9871005
// If the call returns a temporary with struct return, create a temporary
9881006
// alloca to hold the result, unless one is given to us.
9891007
if (RetAI.isIndirect() || RetAI.isCoerceAndExpand() || RetAI.isInAlloca()) {
990-
cir_cconv_unreachable("NYI");
1008+
sRetPtr = createAlloca(loc, RetTy, *this);
1009+
IRCallArgs[IRFunctionArgs.getSRetArgNo()] = sRetPtr;
9911010
}
9921011

9931012
cir_cconv_assert(!cir::MissingFeatures::swift());
@@ -1082,6 +1101,32 @@ mlir::Value LowerFunction::rewriteCallOp(const LowerFunctionInfo &CallInfo,
10821101

10831102
break;
10841103
}
1104+
case ABIArgInfo::Indirect:
1105+
case ABIArgInfo::IndirectAliased: {
1106+
assert(NumIRArgs == 1);
1107+
// TODO(cir): For aggregate types
1108+
// We want to avoid creating an unnecessary temporary+copy here;
1109+
// however, we need one in three cases:
1110+
// 1. If the argument is not byval, and we are required to copy the
1111+
// 2. If the argument is byval, RV is not sufficiently aligned, and
1112+
// source. (This case doesn't occur on any common architecture.)
1113+
// we cannot force it to be sufficiently aligned.
1114+
// 3. If the argument is byval, but RV is not located in default
1115+
// or alloca address space.
1116+
cir_cconv_assert(!::cir::MissingFeatures::skipTempCopy());
1117+
1118+
mlir::Value alloca = findAlloca(I->getDefiningOp());
1119+
1120+
// since they are a ARM-specific feature.
1121+
if (::cir::MissingFeatures::undef())
1122+
cir_cconv_unreachable("NYI");
1123+
1124+
IRCallArgs[FirstIRArg] = alloca;
1125+
1126+
// NOTE(cir): Skipping Emissions, lifetime markers.
1127+
1128+
break;
1129+
}
10851130
default:
10861131
llvm::outs() << "Missing ABIArgInfo::Kind: " << ArgInfo.getKind() << "\n";
10871132
cir_cconv_unreachable("NYI");
@@ -1217,6 +1262,10 @@ mlir::Value LowerFunction::rewriteCallOp(const LowerFunctionInfo &CallInfo,
12171262
// done in CIRGen
12181263
return RetVal;
12191264
}
1265+
case ABIArgInfo::Indirect: {
1266+
auto load = rewriter.create<LoadOp>(loc, sRetPtr);
1267+
return load.getResult();
1268+
}
12201269
default:
12211270
llvm::errs() << "Unhandled ABIArgInfo kind: " << RetAI.getKind() << "\n";
12221271
cir_cconv_unreachable("NYI");

clang/test/CIR/CallConvLowering/AArch64/aarch64-cc-structs.c

+37
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,43 @@ void pass_eq_128(EQ_128 s) {}
153153
// LLVM: %[[#V2:]] = load ptr, ptr %[[#V1]], align 8
154154
void pass_gt_128(GT_128 s) {}
155155

156+
// CHECK: cir.func @get_gt_128(%arg0: !cir.ptr<!ty_GT_128_> {{.*}}, %arg1: !cir.ptr<!ty_GT_128_>
157+
// CHECK: %[[#V0:]] = cir.alloca !cir.ptr<!ty_GT_128_>, !cir.ptr<!cir.ptr<!ty_GT_128_>>, [""] {alignment = 8 : i64}
158+
// CHECK: cir.store %arg1, %[[#V0]] : !cir.ptr<!ty_GT_128_>, !cir.ptr<!cir.ptr<!ty_GT_128_>>
159+
// CHECK: %[[#V1:]] = cir.load %[[#V0]] : !cir.ptr<!cir.ptr<!ty_GT_128_>>, !cir.ptr<!ty_GT_128_>
160+
// CHECK: cir.copy %[[#V1]] to %arg0 : !cir.ptr<!ty_GT_128_>
161+
// CHECK: cir.return
162+
163+
// LLVM: void @get_gt_128(ptr %[[#V0:]], ptr %[[#V1:]])
164+
// LLVM: %[[#V3:]] = alloca ptr, i64 1, align 8
165+
// LLVM: store ptr %[[#V1]], ptr %[[#V3]], align 8
166+
// LLVM: %[[#V4:]] = load ptr, ptr %[[#V3]], align 8
167+
// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V0]], ptr %[[#V4]], i32 24, i1 false)
168+
// LLVM: ret void
169+
GT_128 get_gt_128(GT_128 s) {
170+
return s;
171+
}
172+
173+
// CHECK: cir.func no_proto @call_and_get_gt_128(%arg0: !cir.ptr<!ty_GT_128_>
174+
// CHECK: %[[#V0:]] = cir.alloca !ty_GT_128_, !cir.ptr<!ty_GT_128_>, {{.*}} {alignment = 8 : i64}
175+
// CHECK: %[[#V1:]] = cir.alloca !ty_GT_128_, !cir.ptr<!ty_GT_128_>, {{.*}} {alignment = 8 : i64}
176+
// CHECK: cir.call @get_gt_128(%[[#V1]], %arg0) : (!cir.ptr<!ty_GT_128_>, !cir.ptr<!ty_GT_128_>) -> ()
177+
// CHECK: %[[#V2:]] = cir.load %[[#V1]] : !cir.ptr<!ty_GT_128_>, !ty_GT_128_
178+
// CHECK: cir.store %[[#V2]], %[[#V0]] : !ty_GT_128_, !cir.ptr<!ty_GT_128_>
179+
// CHECK: cir.return
180+
181+
// LLVM: void @call_and_get_gt_128(ptr %[[#V0:]])
182+
// LLVM: %[[#V2:]] = alloca %struct.GT_128, i64 1, align 8
183+
// LLVM: %[[#V3:]] = alloca %struct.GT_128, i64 1, align 8
184+
// LLVM: call void @get_gt_128(ptr %[[#V3]], ptr %[[#V0]])
185+
// LLVM: %[[#V4:]] = load %struct.GT_128, ptr %[[#V3]], align 8
186+
// LLVM: store %struct.GT_128 %[[#V4]], ptr %[[#V2]], align 8
187+
// LLVM: ret void
188+
GT_128 call_and_get_gt_128() {
189+
GT_128 s;
190+
s = get_gt_128(s);
191+
return s;
192+
}
156193
// CHECK: cir.func @passS(%arg0: !cir.array<!u64i x 2>
157194
// CHECK: %[[#V0:]] = cir.alloca !ty_S, !cir.ptr<!ty_S>, [""] {alignment = 4 : i64}
158195
// CHECK: %[[#V1:]] = cir.alloca !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>, ["tmp"] {alignment = 8 : i64}

0 commit comments

Comments
 (0)