intel · bader · Jul 29, 2024 · Jul 23, 2024 · Jul 23, 2024 · Jul 23, 2024
@@ -317,12 +317,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   bool shouldEmitStaticExternCAliases() const override;
   bool shouldEmitDWARFBitFieldSeparators() const override;
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
-
-private:
-  // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
-  // resulting MDNode to the amdgcn.annotations MDNode.
-  static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
-                                int Operand);
 };
 }
 
@@ -404,33 +398,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
   }
 }
 
-/// Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV,
-/// Name, and Operand as operands, and adds the resulting MDNode to the
-/// AnnotationName MDNode.
-static void addAMDGCOrNVVMMetadata(const char *AnnotationName,
-                                   llvm::GlobalValue *GV, StringRef Name,
-                                   int Operand) {
-  llvm::Module *M = GV->getParent();
-  llvm::LLVMContext &Ctx = M->getContext();
-
-  // Get annotations metadata node.
-  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);
-
-  llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
-  // Append metadata to annotations node.
-  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-}
-
-
-void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
-                                                StringRef Name, int Operand) {
-  addAMDGCOrNVVMMetadata("amdgcn.annotations", GV, Name, Operand);
-}
-
-
 /// Emits control constants used to change per-architecture behaviour in the
 /// AMDGPU ROCm device libraries.
 void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
@@ -483,12 +450,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
   if (FD)
     setFunctionDeclAttributes(FD, F, M);
 
-  // Create !{<func-ref>, metadata !"kernel", i32 1} node for SYCL kernels.
-  const bool IsSYCLKernel =
-      FD && M.getLangOpts().SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>();
-  if (IsSYCLKernel)
-    addAMDGCNMetadata(F, "kernel", 1);
-
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
 

@@ -23,15 +23,12 @@ class Functor {
 };
 
 // CHECK-SPIR-NOT: annotations =
+// CHECK-AMDGCN-NOT: annotations =
 
 // CHECK-NVPTX: nvvm.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
 // CHECK-NVPTX: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
 // CHECK-NVPTX: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
 
-// CHECK-AMDGCN: amdgcn.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
-// CHECK-AMDGCN: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
-// CHECK-AMDGCN: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
-
 int main() {
   sycl::queue q;
   q.submit([&](sycl::handler &cgh) {

@@ -15847,33 +15847,6 @@ track the usage for each kernel. However, in some cases careful organization of
 the kernels and functions in the source file means there is minimal additional
 effort required to accurately calculate GPR usage.
 
-SYCL Kernel Metadata
-====================
-
-This section describes the additional metadata that is inserted for SYCL
-kernels. As SYCL is a single source programming model functions can either
-execute on a host or a device (i.e. GPU). Device kernels are akin to kernel
-entry-points in GPU program. To mark an LLVM IR function as a device kernel
-function, we make use of special LLVM metadata. The AMDGCN back-end will look
-for a named metadata node called ``amdgcn.annotations``. This named metadata
-must contain a list of metadata that describe the kernel IR. For our purposes,
-we need to declare a metadata node that assigns the `"kernel"` attribute to the
-LLVM IR function that should be emitted as a SYCL kernel function. These
-metadata nodes take the form:
-
-.. code-block:: text
-
-  !{<function ref>, metadata !"kernel", i32 1}
-
-Consider the metadata generated by global-offset pass, showing a void kernel
-function `example_kernel_with_offset` taking one argument, a pointer to 3 i32
-integers:
-
-.. code-block:: llvm
-
-  !amdgcn.annotations = !{!0}
-  !0 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1}
-
 Additional Documentation
 ========================
 

@@ -24,10 +24,6 @@ class PassRegistry;
 /// with an offset parameter which will be threaded through from the kernel
 /// entry point.
 class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
-private:
-  using KernelPayload = TargetHelpers::KernelPayload;
-  using ArchType = TargetHelpers::ArchType;
-
 public:
   explicit GlobalOffsetPass() {}
 
@@ -41,7 +37,8 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
   /// appended to the name.
   ///
   /// \param Func Kernel to be processed.
-  void processKernelEntryPoint(Function *Func);
+  void processKernelEntryPoint(Function *Func,
+                               TargetHelpers::KernelCache &KCache);
 
   /// For a function containing a call instruction to the implicit offset
   /// intrinsic, or another function which eventually calls the intrinsic,
@@ -65,7 +62,8 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
   /// to have the implicit parameter added to it or be replaced with the
   /// implicit  parameter.
   void addImplicitParameterToCallers(Module &M, Value *Callee,
-                                     Function *CalleeWithImplicitParam);
+                                     Function *CalleeWithImplicitParam,
+                                     TargetHelpers::KernelCache &KCache);
 
   /// For a given function `Func` create a clone and extend its signature to
   /// contain an implicit offset argument.
@@ -89,18 +87,6 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
                               Type *ImplicitArgumentType = nullptr,
                               bool KeepOriginal = false, bool IsKernel = false);
 
-  /// Create a mapping of kernel entry points to their metadata nodes. While
-  /// iterating over kernels make sure that a given kernel entry point has no
-  /// llvm uses.
-  ///
-  /// \param KernelPayloads A collection of kernel functions present in a
-  /// module `M`.
-  ///
-  /// \returns A map of kernel functions to corresponding metadata nodes.
-  DenseMap<Function *, MDNode *>
-  generateKernelMDNodeMap(Module &M,
-                          SmallVectorImpl<KernelPayload> &KernelPayloads);
-
 private:
   /// Keep track of all cloned offset functions to avoid processing them.
   llvm::SmallPtrSet<Function *, 8> Clones;
@@ -109,14 +95,11 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
   /// Keep track of which non-offset functions have been processed to avoid
   /// processing twice.
   llvm::DenseMap<Function *, Value *> ProcessedFunctions;
-  /// Keep a map of all entry point functions with metadata.
-  llvm::DenseMap<Function *, MDNode *> EntryPointMetadata;
   /// A type of implicit argument added to the kernel signature.
   llvm::Type *KernelImplicitArgumentType = nullptr;
   /// A type used for the alloca holding the values of global offsets.
   llvm::Type *ImplicitOffsetPtrType = nullptr;
 
-  ArchType AT;
   unsigned TargetAS = 0;
 };
 

@@ -25,10 +25,6 @@ class PassRegistry;
 /// functions.
 class LocalAccessorToSharedMemoryPass
     : public PassInfoMixin<LocalAccessorToSharedMemoryPass> {
-private:
-  using KernelPayload = TargetHelpers::KernelPayload;
-  using ArchType = TargetHelpers::ArchType;
-
 public:
   explicit LocalAccessorToSharedMemoryPass() {}
 
@@ -49,12 +45,6 @@ class LocalAccessorToSharedMemoryPass
   /// \returns A new function with global symbol accesses.
   Function *processKernel(Module &M, Function *F);
 
-  /// Update kernel metadata to reflect the change in the signature.
-  ///
-  /// \param A map of original kernels to the modified ones.
-  void postProcessKernels(
-      SmallVectorImpl<std::pair<Function *, KernelPayload>> &NewToOldKernels);
-
 private:
   /// The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
   /// to be 3.

@@ -22,21 +22,54 @@ using namespace llvm;
 namespace llvm {
 namespace TargetHelpers {
 
-enum class ArchType { Cuda, AMDHSA, Unsupported };
+struct KernelCache {
+  void populateKernels(Module &M);
 
-struct KernelPayload {
-  KernelPayload(Function *Kernel, MDNode *MD = nullptr);
-  Function *Kernel;
-  MDNode *MD;
-  SmallVector<MDNode *> DependentMDs;
-};
+  bool isKernel(Function &F) const;
+
+  /// Updates cached data with a function intended as a replacement of an
+  /// existing function.
+  void handleReplacedWith(Function &OldF, Function &NewF);
+
+  /// Updates cached data with a new clone of an existing function.
+  /// The KernelOnly parameter updates cached data with only the information
+  /// required to identify the new function as a kernel.
+  void handleNewCloneOf(Function &OldF, Function &NewF, bool KernelOnly);
+
+private:
+  /// Extra data about a kernel function. Only applicable to NVPTX kernels,
+  /// which have associated annotation metadata.
+  struct KernelPayload {
+    explicit KernelPayload() = default;
+    KernelPayload(NamedMDNode *ModuleAnnotationsMD);
+
+    bool hasAnnotations() const { return ModuleAnnotationsMD != nullptr; }
 
-ArchType getArchType(const Module &M);
+    /// ModuleAnnotationsMD - metadata conntaining the unique global list of
+    /// annotations.
+    NamedMDNode *ModuleAnnotationsMD = nullptr;
+    SmallVector<MDNode *> DependentMDs;
+  };
 
-std::string getAnnotationString(ArchType AT);
+  /// List of kernels in original Module order
+  SmallVector<Function *, 4> Kernels;
+  /// Map of kernels to extra data. Also serves as a quick kernel query.
+  SmallDenseMap<Function *, KernelPayload> KernelData;
+
+public:
+  using iterator = decltype(Kernels)::iterator;
+  using const_iterator = decltype(Kernels)::const_iterator;
+
+  iterator begin() { return Kernels.begin(); }
+  iterator end() { return Kernels.end(); }
+
+  const_iterator begin() const { return Kernels.begin(); }
+  const_iterator end() const { return Kernels.end(); }
+
+  bool empty() const { return Kernels.empty(); }
+};
 
-void populateKernels(Module &M, SmallVectorImpl<KernelPayload> &Kernels,
-                     TargetHelpers::ArchType AT);
+bool isSYCLDevice(const Module &M);
 
 } // end namespace TargetHelpers
 } // end namespace llvm