intel
diff --git a/‎clang/include/clang/Basic/BuiltinsAMDGPU.def
Lines changed: 5 additions & 0 deletions b/‎clang/include/clang/Basic/BuiltinsAMDGPU.def
Lines changed: 5 additions & 0 deletions
diff --git a/‎clang/lib/CodeGen/TargetInfo.cpp
Lines changed: 38 additions & 13 deletions b/‎clang/lib/CodeGen/TargetInfo.cpp
Lines changed: 38 additions & 13 deletions
diff --git a/‎clang/lib/Driver/ToolChains/Clang.cpp
Lines changed: 0 additions & 6 deletions b/‎clang/lib/Driver/ToolChains/Clang.cpp
Lines changed: 0 additions & 6 deletions
diff --git a/‎clang/lib/Driver/ToolChains/HIPAMD.cpp
Lines changed: 2 additions & 6 deletions b/‎clang/lib/Driver/ToolChains/HIPAMD.cpp
Lines changed: 2 additions & 6 deletions
diff --git a/‎clang/test/CodeGenSYCL/kernel-annotation.cpp
Lines changed: 48 additions & 0 deletions b/‎clang/test/CodeGenSYCL/kernel-annotation.cpp
Lines changed: 48 additions & 0 deletions
diff --git a/‎clang/test/Driver/sycl-local-accessor-opt.cpp
Lines changed: 0 additions & 14 deletions b/‎clang/test/Driver/sycl-local-accessor-opt.cpp
Lines changed: 0 additions & 14 deletions
diff --git a/‎libclc/amdgcn-amdhsa/libspirv/SOURCES
Lines changed: 1 addition & 0 deletions b/‎libclc/amdgcn-amdhsa/libspirv/SOURCES
Lines changed: 1 addition & 0 deletions
diff --git a/‎libclc/amdgcn-amdhsa/libspirv/workitem/get_global_offset.ll
Lines changed: 42 additions & 0 deletions b/‎libclc/amdgcn-amdhsa/libspirv/workitem/get_global_offset.ll
Lines changed: 42 additions & 0 deletions
diff --git a/‎libclc/amdgcn/libspirv/SOURCES
Lines changed: 0 additions & 1 deletion b/‎libclc/amdgcn/libspirv/SOURCES
Lines changed: 0 additions & 1 deletion
diff --git a/‎libclc/amdgcn/libspirv/workitem/get_global_offset.cl
Lines changed: 0 additions & 25 deletions b/‎libclc/amdgcn/libspirv/workitem/get_global_offset.cl
Lines changed: 0 additions & 25 deletions
diff --git a/‎llvm/docs/AMDGPUUsage.rst
Lines changed: 27 additions & 0 deletions b/‎llvm/docs/AMDGPUUsage.rst
Lines changed: 27 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Lines changed: 4 additions & 0 deletions b/‎llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Target/NVPTX/SYCL/GlobalOffset.h renamed to ‎llvm/include/llvm/SYCLLowerIR/GlobalOffset.h b/‎llvm/lib/Target/NVPTX/SYCL/GlobalOffset.h renamed to ‎llvm/include/llvm/SYCLLowerIR/GlobalOffset.h
diff --git a/‎llvm/include/llvm/SYCLLowerIR/TargetHelpers.h
Lines changed: 43 additions & 0 deletions b/‎llvm/include/llvm/SYCLLowerIR/TargetHelpers.h
Lines changed: 43 additions & 0 deletions
diff --git a/‎llvm/lib/SYCLLowerIR/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎llvm/lib/SYCLLowerIR/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
@@ -316,5 +316,10 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi",
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts")
 TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts")
 
+//===----------------------------------------------------------------------===//
+// SYCL builtin.
+//===----------------------------------------------------------------------===//
+BUILTIN(__builtin_amdgcn_implicit_offset, "Ui*5", "nc")
+
 #undef BUILTIN
 #undef TARGET_BUILTIN
@@ -151,6 +151,26 @@ static bool occupiesMoreThan(CodeGenTypes &cgt,
   return (intCount + fpCount > maxAllRegisters);
 }
 
+/// Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV,
+/// Name, and Operand as operands, and adds the resulting MDNode to the
+/// AnnotationName MDNode.
+static void addAMDGCOrNVVMMetadata(const char *AnnotationName,
+                                   llvm::GlobalValue *GV, StringRef Name,
+                                   int Operand) {
+  llvm::Module *M = GV->getParent();
+  llvm::LLVMContext &Ctx = M->getContext();
+
+  // Get annotations metadata node.
+  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);
+
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+  // Append metadata to annotations node.
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
 bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
                                              llvm::Type *eltTy,
                                              unsigned numElts) const {
@@ -7327,18 +7347,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
                                              StringRef Name, int Operand) {
-  llvm::Module *M = GV->getParent();
-  llvm::LLVMContext &Ctx = M->getContext();
-
-  // Get "nvvm.annotations" metadata node
-  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
-
-  llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
-  // Append metadata to nvvm.annotations
-  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+  addAMDGCOrNVVMMetadata("nvvm.annotations", GV, Name, Operand);
 }
 
 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
@@ -9308,6 +9317,12 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
                             llvm::Type *BlockTy) const override;
   bool shouldEmitStaticExternCAliases() const override;
   void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+
+private:
+  // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
+  // resulting MDNode to the amdgcn.annotations MDNode.
+  static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
+                                int Operand);
 };
 }
 
@@ -9324,6 +9339,11 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
            cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
 }
 
+void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
+                                                StringRef Name, int Operand) {
+  addAMDGCOrNVVMMetadata("amdgcn.annotations", GV, Name, Operand);
+}
+
 void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
     const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
   const auto *ReqdWGS =
@@ -9425,10 +9445,15 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
 
   const bool IsHIPKernel =
       M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
-
   if (IsHIPKernel)
     F->addFnAttr("uniform-work-group-size", "true");
 
+  // Create !{<func-ref>, metadata !"kernel", i32 1} node for SYCL kernels.
+  const bool IsSYCLKernel =
+      FD && M.getLangOpts().SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>();
+  if (IsSYCLKernel)
+    addAMDGCNMetadata(F, "kernel", 1);
+
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
 
 
@@ -5842,12 +5842,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
   }
 
-  // Enable local accessor to shared memory pass for SYCL.
-  if (isa<BackendJobAction>(JA) && IsSYCLOffloadDevice &&
-      (Triple.isNVPTX() || Triple.isAMDGCN())) {
-    CmdArgs.push_back("-mllvm");
-    CmdArgs.push_back("-sycl-enable-local-accessor");
-  }
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;
 
@@ -78,12 +78,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
                                          const llvm::opt::ArgList &Args) const {
   // Construct lld command.
   // The output from ld.lld is an HSA code object file.
-  ArgStringList LldArgs{"-flavor",
-                        "gnu",
-                        "--no-undefined",
-                        "-shared",
-                        "-plugin-opt=-amdgpu-internalize-symbols",
-                        "-plugin-opt=-sycl-enable-local-accessor"};
+  ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
+                        "-plugin-opt=-amdgpu-internalize-symbols"};
 
   auto &TC = getToolChain();
   auto &D = TC.getDriver();
 
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple spir64-unknown-unknown -fsycl-is-device \
+// RUN: -S -emit-llvm %s -o %temp.ll
+// RUN: FileCheck -check-prefix=CHECK-SPIR --input-file %temp.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fsycl-is-device \
+// RUN: -S -emit-llvm %s -o %temp.ll
+// RUN: FileCheck -check-prefix=CHECK-NVPTX --input-file %temp.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fsycl-is-device \
+// RUN: -S -emit-llvm %s -o %temp.ll
+// RUN: FileCheck -check-prefix=CHECK-AMDGCN --input-file %temp.ll %s
+
+// The test makes sure that `[nnvm|amdgcn].annotations are correctly generated
+// only for their respective targets.
+
+#include "Inputs/sycl.hpp"
+
+sycl::handler H;
+
+class Functor {
+public:
+  void operator()() const {}
+};
+
+// CHECK-SPIR-NOT: annotations =
+
+// CHECK-NVPTX: nvvm.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
+// CHECK-NVPTX: [[FIRST]] = !{void ()* @_ZTS7Functor, !"kernel", i32 1}
+// CHECK-NVPTX: [[SECOND]] = !{void ()* @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
+
+// CHECK-AMDGCN: amdgcn.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
+// CHECK-AMDGCN: [[FIRST]] = !{void ()* @_ZTS7Functor, !"kernel", i32 1}
+// CHECK-AMDGCN: [[SECOND]] = !{void ()* @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
+
+int main() {
+  sycl::queue q;
+  q.submit([&](sycl::handler &cgh) {
+    Functor foo{};
+    cgh.single_task(foo);
+  });
+
+  q.submit([&](cl::sycl::handler &cgh) {
+    cgh.parallel_for<class foo_2>(cl::sycl::range<1>(1),
+                                  [=](cl::sycl::item<1> item) {
+                                  });
+  });
+  return 0;
+}
@@ -1,4 +1,5 @@
 
+workitem/get_global_offset.ll
 group/collectives.cl
 group/collectives_helpers.ll
 atomic/loadstore_helpers.ll
 
@@ -0,0 +1,42 @@
+;;===----------------------------------------------------------------------===//
+;;
+;; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+;; See https://llvm.org/LICENSE.txt for license information.
+;; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;;
+;;===----------------------------------------------------------------------===//
+
+#if __clang_major__ >= 7
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+#else
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+#endif
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
+
+define hidden i64 @_Z22__spirv_GlobalOffset_xv() nounwind alwaysinline {
+entry:
+  %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
+  %1 = load i32, i32 addrspace(5)* %0, align 4
+  %zext = zext i32 %1 to i64
+  ret i64 %zext
+}
+
+define hidden i64 @_Z22__spirv_GlobalOffset_yv() nounwind alwaysinline {
+entry:
+  %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 1
+  %1 = load i32, i32 addrspace(5)* %arrayidx, align 4
+  %zext = zext i32 %1 to i64
+  ret i64 %zext
+}
+
+define hidden i64 @_Z22__spirv_GlobalOffset_zv() nounwind alwaysinline {
+entry:
+  %0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
+  %1 = load i32, i32 addrspace(5)* %arrayidx, align 4
+  %zext = zext i32 %1 to i64
+  ret i64 %zext
+}
@@ -1,4 +1,3 @@
-workitem/get_global_offset.cl
 workitem/get_group_id.cl
 workitem/get_global_size.cl
 workitem/get_local_id.cl
 
@@ -15001,6 +15001,33 @@ track the usage for each kernel. However, in some cases careful organization of
 the kernels and functions in the source file means there is minimal additional
 effort required to accurately calculate GPR usage.
 
+SYCL Kernel Metadata
+====================
+
+This section describes the additional metadata that is inserted for SYCL
+kernels. As SYCL is a single source programming model functions can either
+execute on a host or a device (i.e. GPU). Device kernels are akin to kernel
+entry-points in GPU program. To mark an LLVM IR function as a device kernel
+function, we make use of special LLVM metadata. The AMDGCN back-end will look
+for a named metadata node called ``amdgcn.annotations``. This named metadata
+must contain a list of metadata that describe the kernel IR. For our purposes,
+we need to declare a metadata node that assigns the `"kernel"` attribute to the
+LLVM IR function that should be emitted as a SYCL kernel function. These
+metadata nodes take the form:
+
+.. code-block:: text
+
+  !{<function ref>, metadata !"kernel", i32 1}
+
+Consider the metadata generated by global-offset pass, showing a void kernel
+function `example_kernel_with_offset` taking one argument, a pointer to 3 i32
+integers:
+
+.. code-block:: llvm
+
+  !amdgcn.annotations = !{!0}
+  !0 = !{void ([3 x i32]*)* @_ZTS14example_kernel_with_offset, !"kernel", i32 1}
+
 Additional Documentation
 ========================
 
 
@@ -2061,4 +2061,8 @@ def int_amdgcn_reloc_constant : Intrinsic<
   [llvm_i32_ty], [llvm_metadata_ty],
   [IntrNoMem, IntrSpeculatable, IntrWillReturn]
 >;
+
+// SYCL
+def int_amdgcn_implicit_offset : GCCBuiltin<"__builtin_amdgcn_implicit_offset">,
+  Intrinsic<[LLVMQualPointerType<llvm_i32_ty, 5>], [], [IntrNoMem, IntrSpeculatable]>;
 }
@@ -0,0 +1,43 @@
+//===------------ TargetHelpers.h - Helpers for SYCL kernels ------------- ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helper functions for processing SYCL kernels.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYCL_SYCL_LOWER_IR_TARGET_HELPERS_H
+#define LLVM_SYCL_SYCL_LOWER_IR_TARGET_HELPERS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+namespace llvm {
+namespace TargetHelpers {
+
+enum class ArchType { Cuda, AMDHSA, Unsupported };
+
+struct KernelPayload {
+  KernelPayload(Function *Kernel, MDNode *MD = nullptr);
+  Function *Kernel;
+  MDNode *MD;
+};
+
+ArchType getArchType(const Module &M);
+
+std::string getAnnotationString(ArchType AT);
+
+void populateKernels(Module &M, SmallVectorImpl<KernelPayload> &Kernels,
+                     TargetHelpers::ArchType AT);
+
+} // end namespace TargetHelpers
+} // end namespace llvm
+
+#endif
@@ -59,6 +59,8 @@ add_llvm_component_library(LLVMSYCLLowerIR
   MutatePrintfAddrspace.cpp
 
   LocalAccessorToSharedMemory.cpp
+  GlobalOffset.cpp
+  TargetHelpers.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`
	`2`	`+workitem/get_global_offset.ll`
`2`	`3`	`group/collectives.cl`
`3`	`4`	`group/collectives_helpers.ll`
`4`	`5`	`atomic/loadstore_helpers.ll`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-workitem/get_global_offset.cl`
`2`	`1`	`workitem/get_group_id.cl`
`3`	`2`	`workitem/get_global_size.cl`
`4`	`3`	`workitem/get_local_id.cl`
Original file line number	Diff line number	Diff line change
`@@ -2061,4 +2061,8 @@ def int_amdgcn_reloc_constant : Intrinsic<`
`2061`	`2061`	`[llvm_i32_ty], [llvm_metadata_ty],`
`2062`	`2062`	`[IntrNoMem, IntrSpeculatable, IntrWillReturn]`
`2063`	`2063`	`>;`
	`2064`	`+`
	`2065`	`+// SYCL`
	`2066`	`+def int_amdgcn_implicit_offset : GCCBuiltin<"__builtin_amdgcn_implicit_offset">,`
	`2067`	`+ Intrinsic<[LLVMQualPointerType<llvm_i32_ty, 5>], [], [IntrNoMem, IntrSpeculatable]>;`
`2064`	`2068`	`}`