DoyleLi
diff --git a/‎clang/include/clang/Basic/AddressSpaces.h
+34 b/‎clang/include/clang/Basic/AddressSpaces.h
+34
diff --git a/‎clang/include/clang/Basic/DiagnosticDriverKinds.td
+2 b/‎clang/include/clang/Basic/DiagnosticDriverKinds.td
+2
diff --git a/‎clang/lib/AST/MicrosoftMangle.cpp
+15 b/‎clang/lib/AST/MicrosoftMangle.cpp
+15
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp
+6-10 b/‎clang/lib/CodeGen/CGBuiltin.cpp
+6-10
diff --git a/‎clang/lib/Driver/Driver.cpp
+81-21 b/‎clang/lib/Driver/Driver.cpp
+81-21
diff --git a/‎clang/lib/Driver/ToolChain.cpp
+3-2 b/‎clang/lib/Driver/ToolChain.cpp
+3-2
diff --git a/‎clang/lib/Sema/SemaSYCL.cpp
+9-2 b/‎clang/lib/Sema/SemaSYCL.cpp
+9-2
diff --git a/‎clang/test/CodeGenOpenCL/spirv-builtins-addr-space.cl
+18 b/‎clang/test/CodeGenOpenCL/spirv-builtins-addr-space.cl
+18
diff --git a/‎clang/test/CodeGenSYCL/Inputs/sycl.hpp
+14-7 b/‎clang/test/CodeGenSYCL/Inputs/sycl.hpp
+14-7
@@ -87,6 +87,40 @@ inline bool isPtrSizeAddressSpace(LangAS AS) {
           AS == LangAS::ptr64);
 }
 
+inline LangAS asSYCLLangAS(LangAS AS) {
+  switch (AS) {
+  case LangAS::opencl_global:
+    return LangAS::sycl_global;
+  case LangAS::opencl_global_device:
+    return LangAS::sycl_global_device;
+  case LangAS::opencl_global_host:
+    return LangAS::sycl_global_host;
+  case LangAS::opencl_local:
+    return LangAS::sycl_local;
+  case LangAS::opencl_private:
+    return LangAS::sycl_private;
+  default:
+    return AS;
+  }
+}
+
+inline LangAS asOpenCLLangAS(LangAS AS) {
+  switch (AS) {
+  case LangAS::sycl_global:
+    return LangAS::opencl_global;
+  case LangAS::sycl_global_device:
+    return LangAS::opencl_global_device;
+  case LangAS::sycl_global_host:
+    return LangAS::opencl_global_host;
+  case LangAS::sycl_local:
+    return LangAS::opencl_local;
+  case LangAS::sycl_private:
+    return LangAS::opencl_private;
+  default:
+    return AS;
+  }
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_ADDRESSSPACES_H
@@ -303,6 +303,8 @@ def err_drv_expecting_fsycl_with_sycl_opt : Error<
   "'%0' must be used in conjunction with '-fsycl' to enable offloading">;
 def err_drv_fsycl_with_c_type : Error<
   "'%0' must not be used in conjunction with '-fsycl', which expects C++ source">;
+def err_drv_sycl_missing_amdgpu_arch : Error<
+  "missing AMDGPU architecture for SYCL offloading; specify it with '-Xsycl-target-backend --offload-arch'">;
 def warn_drv_sycl_offload_target_duplicate : Warning<
   "SYCL offloading target '%0' is similar to target '%1' already specified; "
   "will be ignored">, InGroup<SyclTarget>;
 
@@ -2184,6 +2184,21 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
     case LangAS::cuda_device:
       Extra.mangleSourceName("_ASCUdevice");
       break;
+    case LangAS::sycl_global:
+      Extra.mangleSourceName("_ASSYglobal");
+      break;
+    case LangAS::sycl_global_device:
+      Extra.mangleSourceName("_ASSYdevice");
+      break;
+    case LangAS::sycl_global_host:
+      Extra.mangleSourceName("_ASSYhost");
+      break;
+    case LangAS::sycl_local:
+      Extra.mangleSourceName("_ASSYlocal");
+      break;
+    case LangAS::sycl_private:
+      Extra.mangleSourceName("_ASSYprivate");
+      break;
     case LangAS::cuda_constant:
       Extra.mangleSourceName("_ASCUconstant");
       break;
 
@@ -18344,22 +18344,18 @@ RValue CodeGenFunction::EmitIntelFPGARegBuiltin(const CallExpr *E,
                                                 ReturnValueSlot ReturnValue) {
   const Expr *PtrArg = E->getArg(0);
   QualType ArgType = PtrArg->getType();
-  llvm::Value *V = nullptr;
   StringRef AnnotStr = "__builtin_intel_fpga_reg";
 
-  if (ArgType->isStructureOrClassType() || ArgType->isUnionType()) {
-    RValue RV = EmitAnyExpr(PtrArg);
-    Address A = EmitIntelFPGAFieldAnnotations(E->getExprLoc(),
-                                              RV.getAggregateAddress(),
-                                              AnnotStr);
-    llvm::Type *VTy = ReturnValue.getValue().getPointer()->getType();
-    uint64_t SizeVal = CGM.getDataLayout().getTypeAllocSize(VTy);
-    Builder.CreateMemCpy(ReturnValue.getValue(), A, SizeVal, false);
+  if (ArgType->isRecordType()) {
+    Address DstAddr = ReturnValue.getValue();
+    EmitAnyExprToMem(PtrArg, DstAddr, ArgType.getQualifiers(), true);
+    Address A =
+        EmitIntelFPGAFieldAnnotations(E->getExprLoc(), DstAddr, AnnotStr);
     return RValue::getAggregate(A);
   }
 
   // if scalar type
-  V = EmitScalarExpr(PtrArg);
+  llvm::Value *V = EmitScalarExpr(PtrArg);
 
   // llvm.annotation does not accept anything but integer types.
   llvm::Type *OrigVType = V->getType();
 
@@ -3945,8 +3945,9 @@ class OffloadingActionBuilder final {
     /// List of static archives to extract FPGA dependency info from
     ActionList FPGAArchiveInputs;
 
-    /// List of CUDA architectures to use in this compilation with NVPTX targets.
-    SmallVector<CudaArch, 8> GpuArchList;
+    /// List of GPU architectures to use in this compilation with NVPTX/AMDGCN
+    /// targets.
+    SmallVector<std::pair<llvm::Triple, std::string>, 8> GpuArchList;
 
     /// Build the last steps for CUDA after all BC files have been linked.
     JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) {
@@ -3983,13 +3984,17 @@ class OffloadingActionBuilder final {
                       const Driver::InputList &Inputs)
         : DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}
 
-    void withBoundArchForToolChain(const ToolChain* TC,
+    void withBoundArchForToolChain(const ToolChain *TC,
                                    llvm::function_ref<void(const char *)> Op) {
-      if (TC->getTriple().isNVPTX())
-        for (CudaArch A : GpuArchList)
-          Op(CudaArchToString(A));
-      else
-        Op(nullptr);
+      for (auto &A : GpuArchList) {
+        if (TC->getTriple() == A.first) {
+          Op(Args.MakeArgString(A.second.c_str()));
+          return;
+        }
+      }
+
+      // no bound arch for this toolchain
+      Op(nullptr);
     }
 
     ActionBuilderReturnCode
@@ -4043,8 +4048,8 @@ class OffloadingActionBuilder final {
         }
         const auto *TC = ToolChains.front();
         const char *BoundArch = nullptr;
-        if (TC->getTriple().isNVPTX())
-          BoundArch = CudaArchToString(GpuArchList.front());
+        if (TC->getTriple().isNVPTX() || TC->getTriple().isAMDGCN())
+          BoundArch = GpuArchList.front().second.c_str();
         DA.add(*DeviceCompilerInput, *TC, BoundArch, Action::OFK_SYCL);
         // Clear the input file, it is already a dependence to a host
         // action.
@@ -4627,39 +4632,94 @@ class OffloadingActionBuilder final {
       }
     }
 
-    /// Initialize the GPU architecture list from arguments - this populates `GpuArchList` from
-    /// `--cuda-gpu-arch` flags. Only relevant if compiling to CUDA. Return true if any
-    /// initialization errors are found.
+    /// Initialize the GPU architecture list from arguments - this populates
+    /// `GpuArchList` from `--offload-arch` flags. Only relevant if compiling to
+    /// CUDA or AMDGCN. Return true if any initialization errors are found.
+    /// FIXME: "offload-arch" and the BoundArch mechanism should also be
+    // used in the SYCLToolChain for SPIR-V AOT to track the offload
+    // architecture instead of the Triple sub-arch it currently uses.
     bool initializeGpuArchMap() {
       const OptTable &Opts = C.getDriver().getOpts();
       for (auto *A : Args) {
         unsigned Index;
+        llvm::Triple *TargetBE = nullptr;
 
-        if (A->getOption().matches(options::OPT_Xsycl_backend_EQ))
+        auto GetTripleIt = [&, this](llvm::StringRef Triple) {
+          llvm::Triple TargetTriple{Triple};
+          auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) {
+            return SYCLTriple == TargetTriple;
+          });
+          return TripleIt != SYCLTripleList.end() ? &*TripleIt : nullptr;
+        };
+
+        if (A->getOption().matches(options::OPT_Xsycl_backend_EQ)) {
+          TargetBE = GetTripleIt(A->getValue(0));
           // Passing device args: -Xsycl-target-backend=<triple> -opt=val.
-          if (llvm::Triple(A->getValue(0)).isNVPTX())
+          if (TargetBE)
             Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
           else
             continue;
-        else if (A->getOption().matches(options::OPT_Xsycl_backend))
+        } else if (A->getOption().matches(options::OPT_Xsycl_backend)) {
+          if (SYCLTripleList.size() > 1) {
+            C.getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple)
+                << A->getSpelling();
+            continue;
+          }
           // Passing device args: -Xsycl-target-backend -opt=val.
+          TargetBE = &SYCLTripleList.front();
           Index = Args.getBaseArgs().MakeIndex(A->getValue(0));
-        else
+        } else
           continue;
 
         A->claim();
         auto ParsedArg = Opts.ParseOneArg(Args, Index);
+
         // TODO: Support --no-cuda-gpu-arch, --{,no-}cuda-gpu-arch=all.
         if (ParsedArg &&
             ParsedArg->getOption().matches(options::OPT_offload_arch_EQ)) {
+          llvm::StringRef ArchStr = ParsedArg->getValue(0);
+          if (TargetBE->isNVPTX()) {
+            // CUDA arch also applies to AMDGCN ...
+            CudaArch Arch = StringToCudaArch(ArchStr);
+            if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+              C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch)
+                  << ArchStr;
+              continue;
+            }
+            ArchStr = CudaArchToString(Arch);
+          } else if (TargetBE->isAMDGCN()) {
+            llvm::StringMap<bool> Features;
+            auto Arch =
+                parseTargetID(getHIPOffloadTargetTriple(), ArchStr, &Features);
+            if (!Arch) {
+              C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
+              continue;
+            }
+            auto CanId = getCanonicalTargetID(Arch.getValue(), Features);
+            ArchStr = Args.MakeArgStringRef(CanId);
+          }
           ParsedArg->claim();
-          GpuArchList.push_back(StringToCudaArch(ParsedArg->getValue(0)));
+          GpuArchList.emplace_back(*TargetBE, ArchStr);
         }
       }
 
-      // If there are no CUDA architectures provided then default to SM_50.
-      if (GpuArchList.empty()) {
-        GpuArchList.push_back(CudaArch::SM_50);
+      // Handle defaults architectures
+      for (auto &Triple : SYCLTripleList) {
+        // For NVIDIA use SM_50 as a default
+        if (Triple.isNVPTX() && llvm::none_of(GpuArchList, [&](auto &P) {
+              return P.first.isNVPTX();
+            })) {
+          llvm::StringRef DefaultArch = CudaArchToString(CudaArch::SM_50);
+          GpuArchList.emplace_back(Triple, DefaultArch);
+        }
+
+        // For AMD require the architecture to be set by the user
+        if (Triple.isAMDGCN() && llvm::none_of(GpuArchList, [&](auto &P) {
+              return P.first.isAMDGCN();
+            })) {
+          C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch);
+          return true;
+        }
       }
 
       return false;
 
@@ -1174,8 +1174,9 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOffloadTargetArgs(
     // at all, target and host share a toolchain.
     if (A->getOption().matches(options::OPT_m_Group)) {
       // AMD GPU is a special case, as -mcpu is required for the device
-      // compilation.
-      if (SameTripleAsHost || getTriple().getArch() == llvm::Triple::amdgcn)
+      // compilation, except for SYCL which uses --offload-arch.
+      if (SameTripleAsHost || (getTriple().getArch() == llvm::Triple::amdgcn &&
+                               DeviceOffloadKind != Action::OFK_SYCL))
         DAL->append(A);
       else
         Modified = true;
 
@@ -5302,10 +5302,17 @@ bool Util::isSyclFunction(const FunctionDecl *FD, StringRef Name) {
   if (DC->isTranslationUnit())
     return false;
 
-  std::array<DeclContextDesc, 2> Scopes = {
+  std::array<DeclContextDesc, 2> ScopesSycl = {
       Util::MakeDeclContextDesc(Decl::Kind::Namespace, "cl"),
       Util::MakeDeclContextDesc(Decl::Kind::Namespace, "sycl")};
-  return matchContext(DC, Scopes);
+  std::array<DeclContextDesc, 5> ScopesOneapiExp = {
+      Util::MakeDeclContextDesc(Decl::Kind::Namespace, "cl"),
+      Util::MakeDeclContextDesc(Decl::Kind::Namespace, "sycl"),
+      Util::MakeDeclContextDesc(Decl::Kind::Namespace, "ext"),
+      Util::MakeDeclContextDesc(Decl::Kind::Namespace, "oneapi"),
+      Util::MakeDeclContextDesc(Decl::Kind::Namespace, "experimental")};
+
+  return matchContext(DC, ScopesSycl) || matchContext(DC, ScopesOneapiExp);
 }
 
 bool Util::isAccessorPropertyListType(QualType Ty) {
 
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -x cl -fdeclare-spirv-builtins -fsyntax-only -emit-llvm -o - -O0 | FileCheck %s
+//
+// Check that SPIR-V builtins are declared with OpenCL address spaces rather
+// than SYCL address spaces when using them with OpenCL. OpenCL address spaces
+// are mangled with the CL prefix and SYCL address spaces are mangled with the
+// SY prefix.
+
+// CHECK: __spirv_ocl_modf{{.*}}CLglobal
+void modf_global(float a, global float *ptr) { __spirv_ocl_modf(a, ptr); }
+
+// CHECK: __spirv_ocl_modf{{.*}}CLlocal
+void modf_local(float a, local float *ptr) { __spirv_ocl_modf(a, ptr); }
+
+// CHECK: __spirv_ocl_modf{{.*}}CLprivate
+void modf_private(float a) {
+  float *ptr;
+  __spirv_ocl_modf(a, ptr);
+}
@@ -120,13 +120,6 @@ struct no_alias {
 } // namespace oneapi
 } // namespace ext
 
-namespace ext {
-namespace oneapi {
-template <typename... properties>
-class accessor_property_list {};
-} // namespace oneapi
-} // namespace ext
-
 template <int dim>
 struct id {
   template <typename... T>
@@ -146,6 +139,20 @@ template <int dim> struct item {
   int Data;
 };
 
+namespace ext {
+namespace oneapi {
+template <typename... properties>
+class accessor_property_list {};
+namespace experimental {
+template <int Dims> item<Dims>
+this_item() { return item<Dims>{}; }
+
+template <int Dims> id<Dims>
+this_id() { return id<Dims>{}; }
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+
 template <int Dims> item<Dims>
 this_item() { return item<Dims>{}; }