Skip to content

[SYCLLowerIR] Remove !amdgcn.annotations metadata #14713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,12 +317,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
bool shouldEmitStaticExternCAliases() const override;
bool shouldEmitDWARFBitFieldSeparators() const override;
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;

private:
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the amdgcn.annotations MDNode.
static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
};
}

Expand Down Expand Up @@ -404,33 +398,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
}
}

/// Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV,
/// Name, and Operand as operands, and adds the resulting MDNode to the
/// AnnotationName MDNode.
static void addAMDGCOrNVVMMetadata(const char *AnnotationName,
llvm::GlobalValue *GV, StringRef Name,
int Operand) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

// Get annotations metadata node.
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);

llvm::Metadata *MDVals[] = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
// Append metadata to annotations node.
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}


void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
addAMDGCOrNVVMMetadata("amdgcn.annotations", GV, Name, Operand);
}


/// Emits control constants used to change per-architecture behaviour in the
/// AMDGPU ROCm device libraries.
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
Expand Down Expand Up @@ -483,12 +450,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (FD)
setFunctionDeclAttributes(FD, F, M);

// Create !{<func-ref>, metadata !"kernel", i32 1} node for SYCL kernels.
const bool IsSYCLKernel =
FD && M.getLangOpts().SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>();
if (IsSYCLKernel)
addAMDGCNMetadata(F, "kernel", 1);

if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");

Expand Down
5 changes: 1 addition & 4 deletions clang/test/CodeGenSYCL/kernel-annotation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,12 @@ class Functor {
};

// CHECK-SPIR-NOT: annotations =
// CHECK-AMDGCN-NOT: annotations =

// CHECK-NVPTX: nvvm.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
// CHECK-NVPTX: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
// CHECK-NVPTX: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}

// CHECK-AMDGCN: amdgcn.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
// CHECK-AMDGCN: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
// CHECK-AMDGCN: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}

int main() {
sycl::queue q;
q.submit([&](sycl::handler &cgh) {
Expand Down
27 changes: 0 additions & 27 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15847,33 +15847,6 @@ track the usage for each kernel. However, in some cases careful organization of
the kernels and functions in the source file means there is minimal additional
effort required to accurately calculate GPR usage.

SYCL Kernel Metadata
====================

This section describes the additional metadata that is inserted for SYCL
kernels. As SYCL is a single source programming model functions can either
execute on a host or a device (i.e. GPU). Device kernels are akin to kernel
entry-points in GPU program. To mark an LLVM IR function as a device kernel
function, we make use of special LLVM metadata. The AMDGCN back-end will look
for a named metadata node called ``amdgcn.annotations``. This named metadata
must contain a list of metadata that describe the kernel IR. For our purposes,
we need to declare a metadata node that assigns the `"kernel"` attribute to the
LLVM IR function that should be emitted as a SYCL kernel function. These
metadata nodes take the form:

.. code-block:: text

!{<function ref>, metadata !"kernel", i32 1}

Consider the metadata generated by global-offset pass, showing a void kernel
function `example_kernel_with_offset` taking one argument, a pointer to 3 i32
integers:

.. code-block:: llvm

!amdgcn.annotations = !{!0}
!0 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1}

Additional Documentation
========================

Expand Down
25 changes: 4 additions & 21 deletions llvm/include/llvm/SYCLLowerIR/GlobalOffset.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ class PassRegistry;
/// with an offset parameter which will be threaded through from the kernel
/// entry point.
class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
private:
using KernelPayload = TargetHelpers::KernelPayload;
using ArchType = TargetHelpers::ArchType;

public:
explicit GlobalOffsetPass() {}

Expand All @@ -41,7 +37,8 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
/// appended to the name.
///
/// \param Func Kernel to be processed.
void processKernelEntryPoint(Function *Func);
void processKernelEntryPoint(Function *Func,
TargetHelpers::KernelCache &KCache);

/// For a function containing a call instruction to the implicit offset
/// intrinsic, or another function which eventually calls the intrinsic,
Expand All @@ -65,7 +62,8 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
/// to have the implicit parameter added to it or be replaced with the
/// implicit parameter.
void addImplicitParameterToCallers(Module &M, Value *Callee,
Function *CalleeWithImplicitParam);
Function *CalleeWithImplicitParam,
TargetHelpers::KernelCache &KCache);

/// For a given function `Func` create a clone and extend its signature to
/// contain an implicit offset argument.
Expand All @@ -89,18 +87,6 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
Type *ImplicitArgumentType = nullptr,
bool KeepOriginal = false, bool IsKernel = false);

/// Create a mapping of kernel entry points to their metadata nodes. While
/// iterating over kernels make sure that a given kernel entry point has no
/// llvm uses.
///
/// \param KernelPayloads A collection of kernel functions present in a
/// module `M`.
///
/// \returns A map of kernel functions to corresponding metadata nodes.
DenseMap<Function *, MDNode *>
generateKernelMDNodeMap(Module &M,
SmallVectorImpl<KernelPayload> &KernelPayloads);

private:
/// Keep track of all cloned offset functions to avoid processing them.
llvm::SmallPtrSet<Function *, 8> Clones;
Expand All @@ -109,14 +95,11 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
/// Keep track of which non-offset functions have been processed to avoid
/// processing twice.
llvm::DenseMap<Function *, Value *> ProcessedFunctions;
/// Keep a map of all entry point functions with metadata.
llvm::DenseMap<Function *, MDNode *> EntryPointMetadata;
/// A type of implicit argument added to the kernel signature.
llvm::Type *KernelImplicitArgumentType = nullptr;
/// A type used for the alloca holding the values of global offsets.
llvm::Type *ImplicitOffsetPtrType = nullptr;

ArchType AT;
unsigned TargetAS = 0;
};

Expand Down
10 changes: 0 additions & 10 deletions llvm/include/llvm/SYCLLowerIR/LocalAccessorToSharedMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ class PassRegistry;
/// functions.
class LocalAccessorToSharedMemoryPass
: public PassInfoMixin<LocalAccessorToSharedMemoryPass> {
private:
using KernelPayload = TargetHelpers::KernelPayload;
using ArchType = TargetHelpers::ArchType;

public:
explicit LocalAccessorToSharedMemoryPass() {}

Expand All @@ -49,12 +45,6 @@ class LocalAccessorToSharedMemoryPass
/// \returns A new function with global symbol accesses.
Function *processKernel(Module &M, Function *F);

/// Update kernel metadata to reflect the change in the signature.
///
/// \param A map of original kernels to the modified ones.
void postProcessKernels(
SmallVectorImpl<std::pair<Function *, KernelPayload>> &NewToOldKernels);

private:
/// The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
/// to be 3.
Expand Down
55 changes: 44 additions & 11 deletions llvm/include/llvm/SYCLLowerIR/TargetHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,54 @@ using namespace llvm;
namespace llvm {
namespace TargetHelpers {

enum class ArchType { Cuda, AMDHSA, Unsupported };
struct KernelCache {
void populateKernels(Module &M);

struct KernelPayload {
KernelPayload(Function *Kernel, MDNode *MD = nullptr);
Function *Kernel;
MDNode *MD;
SmallVector<MDNode *> DependentMDs;
};
bool isKernel(Function &F) const;

/// Updates cached data with a function intended as a replacement of an
/// existing function.
void handleReplacedWith(Function &OldF, Function &NewF);

/// Updates cached data with a new clone of an existing function.
/// The KernelOnly parameter updates cached data with only the information
/// required to identify the new function as a kernel.
void handleNewCloneOf(Function &OldF, Function &NewF, bool KernelOnly);

private:
/// Extra data about a kernel function. Only applicable to NVPTX kernels,
/// which have associated annotation metadata.
struct KernelPayload {
explicit KernelPayload() = default;
KernelPayload(NamedMDNode *ModuleAnnotationsMD);

bool hasAnnotations() const { return ModuleAnnotationsMD != nullptr; }

ArchType getArchType(const Module &M);
/// ModuleAnnotationsMD - metadata conntaining the unique global list of
/// annotations.
NamedMDNode *ModuleAnnotationsMD = nullptr;
SmallVector<MDNode *> DependentMDs;
};

std::string getAnnotationString(ArchType AT);
/// List of kernels in original Module order
SmallVector<Function *, 4> Kernels;
/// Map of kernels to extra data. Also serves as a quick kernel query.
SmallDenseMap<Function *, KernelPayload> KernelData;

public:
using iterator = decltype(Kernels)::iterator;
using const_iterator = decltype(Kernels)::const_iterator;

iterator begin() { return Kernels.begin(); }
iterator end() { return Kernels.end(); }

const_iterator begin() const { return Kernels.begin(); }
const_iterator end() const { return Kernels.end(); }

bool empty() const { return Kernels.empty(); }
};

void populateKernels(Module &M, SmallVectorImpl<KernelPayload> &Kernels,
TargetHelpers::ArchType AT);
bool isSYCLDevice(const Module &M);

} // end namespace TargetHelpers
} // end namespace llvm
Expand Down
Loading
Loading