-
Notifications
You must be signed in to change notification settings - Fork 13.5k
clang: Allow targets to set custom metadata on atomics #96906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clang: Allow targets to set custom metadata on atomics #96906
Conversation
Use this to replace the emission of the amdgpu-unsafe-fp-atomics attribute in favor of per-instruction metadata. In the future new fine grained controls should be introduced that also cover the integer cases. Add a wrapper around CreateAtomicRMW that appends the metadata, and update a few use contexts to use it.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesUse this to replace the emission of the amdgpu-unsafe-fp-atomics Add a wrapper around CreateAtomicRMW that appends the metadata, Patch is 38.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96906.diff 9 Files Affected:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index fbf942d06ca6e..fbe9569e50ef6 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -727,7 +727,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
- CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
+ CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());
// For __atomic_*_fetch operations, perform the operation again to
@@ -2034,6 +2034,17 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
IsWeak);
}
+llvm::AtomicRMWInst *
+CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr,
+ llvm::Value *Val, llvm::AtomicOrdering Order,
+ llvm::SyncScope::ID SSID) {
+
+ llvm::AtomicRMWInst *RMW =
+ Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID);
+ getTargetHooks().setTargetAtomicMetadata(*this, *RMW);
+ return RMW;
+}
+
void CodeGenFunction::EmitAtomicUpdate(
LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index f40f3c273206b..8eb7a64c144c8 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2835,9 +2835,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
llvm::Value *amt = llvm::ConstantFP::get(
VMContext, llvm::APFloat(static_cast<float>(1.0)));
- llvm::Value *old =
- Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
- llvm::AtomicOrdering::SequentiallyConsistent);
+ llvm::AtomicRMWInst *old =
+ CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
value = EmitLoadOfLValue(LV, E->getExprLoc());
@@ -3577,9 +3578,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy,
E->getExprLoc()),
LHSTy);
- Value *OldVal = Builder.CreateAtomicRMW(
- AtomicOp, LHSLV.getAddress(), Amt,
- llvm::AtomicOrdering::SequentiallyConsistent);
+
+ llvm::AtomicRMWInst *OldVal =
+ CGF.emitAtomicRMWInst(AtomicOp, LHSLV.getAddress(), Amt);
// Since operation is atomic, the result type is guaranteed to be the
// same as the input in LLVM terms.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..8c152fef73557 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6235,8 +6235,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
X.getAddress().getElementType());
}
- llvm::Value *Res =
- CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+ llvm::AtomicRMWInst *Res =
+ CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 13f12b5d878a6..6cfcb76eea42a 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4153,6 +4153,13 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
+ /// Emit an atomicrmw instruction, and applying relevant metadata when
+ /// applicable.
+ llvm::AtomicRMWInst *emitAtomicRMWInst(
+ llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
+ llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::SyncScope::ID SSID = llvm::SyncScope::System);
+
void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index f242d9e36ed40..1bd821e7206b9 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -333,6 +333,10 @@ class TargetCodeGenInfo {
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const;
+ /// Allow the target to apply other metadata to an atomic instruction
+ virtual void setTargetAtomicMetadata(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst &RMW) const {}
+
/// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
public:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4d3275e17c386..37e6af3d4196a 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -311,6 +311,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
+ void setTargetAtomicMetadata(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst &RMW) const override;
llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Type *BlockTy) const override;
@@ -546,6 +548,23 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
return Ctx.getOrInsertSyncScopeID(Name);
}
+void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
+ CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
+ if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
+ return;
+
+ // TODO: Introduce new, more controlled options that also work for integers,
+ // and deprecate allowAMDGPUUnsafeFPAtomics.
+ llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation();
+ if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
+ llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
+ RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty);
+
+ if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy())
+ RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
+ }
+}
+
bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
new file mode 100644
index 0000000000000..6deff1116e1d8
--- /dev/null
+++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
@@ -0,0 +1,316 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,SAFE %s
+// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -munsafe-fp-atomics -o - %s | FileCheck -check-prefixes=CHECK,UNSAFE %s
+
+// SAFE-LABEL: define dso_local float @test_float_post_inc(
+// SAFE-SAME: ) #[[ATTR0:[0-9]+]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: ret float [[TMP0]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: ret float [[TMP0]]
+//
+float test_float_post_inc()
+{
+ static _Atomic float n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: ret float [[TMP0]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_post_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: ret float [[TMP0]]
+//
+float test_float_post_dc()
+{
+ static _Atomic float n;
+ return n--;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_pre_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: ret float [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_pre_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_dc()
+{
+ static _Atomic float n;
+ return --n;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_pre_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: ret float [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_pre_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_inc()
+{
+ static _Atomic float n;
+ return ++n;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_post_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP1]]
+//
+double test_double_post_inc()
+{
+ static _Atomic double n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_post_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP1]]
+//
+double test_double_post_dc()
+{
+ static _Atomic double n;
+ return n--;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_pre_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP2]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_pre_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP2]]
+//
+double test_double_pre_dc()
+{
+ static _Atomic double n;
+ return --n;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_pre_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP2]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_pre_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP2]]
+//
+double test_double_pre_inc()
+{
+ static _Atomic double n;
+ return ++n;
+}
+
+// SAFE-LABEL: define dso_local half @test__Float16_post_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
+// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
+// SAFE-NEXT: ret half [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
+// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
+// UNSAFE-NEXT: ret half [[TMP1]]
+//
+_Float16 test__Float16_post_inc()
+{
+ static _Atomic _Float16 n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local half @test__Float16_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2...
[truncated]
|
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Use this to replace the emission of the amdgpu-unsafe-fp-atomics
attribute in favor of per-instruction metadata. In the future
new fine grained controls should be introduced that also cover
the integer cases.
Add a wrapper around CreateAtomicRMW that appends the metadata,
and update a few use contexts to use it.