@@ -3945,8 +3945,9 @@ class OffloadingActionBuilder final {
3945
3945
// / List of static archives to extract FPGA dependency info from
3946
3946
ActionList FPGAArchiveInputs;
3947
3947
3948
- // / List of CUDA architectures to use in this compilation with NVPTX targets.
3949
- SmallVector<CudaArch, 8 > GpuArchList;
3948
+ // / List of GPU architectures to use in this compilation with NVPTX/AMDGCN
3949
+ // / targets.
3950
+ SmallVector<std::pair<llvm::Triple, std::string>, 8 > GpuArchList;
3950
3951
3951
3952
// / Build the last steps for CUDA after all BC files have been linked.
3952
3953
JobAction *finalizeNVPTXDependences (Action *Input, const llvm::Triple &TT) {
@@ -3983,13 +3984,17 @@ class OffloadingActionBuilder final {
3983
3984
const Driver::InputList &Inputs)
3984
3985
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}
3985
3986
3986
- void withBoundArchForToolChain (const ToolChain* TC,
3987
+ void withBoundArchForToolChain (const ToolChain * TC,
3987
3988
llvm::function_ref<void (const char *)> Op) {
3988
- if (TC->getTriple ().isNVPTX ())
3989
- for (CudaArch A : GpuArchList)
3990
- Op (CudaArchToString (A));
3991
- else
3992
- Op (nullptr );
3989
+ for (auto &A : GpuArchList) {
3990
+ if (TC->getTriple () == A.first ) {
3991
+ Op (Args.MakeArgString (A.second .c_str ()));
3992
+ return ;
3993
+ }
3994
+ }
3995
+
3996
+ // no bound arch for this toolchain
3997
+ Op (nullptr );
3993
3998
}
3994
3999
3995
4000
ActionBuilderReturnCode
@@ -4043,8 +4048,8 @@ class OffloadingActionBuilder final {
4043
4048
}
4044
4049
const auto *TC = ToolChains.front ();
4045
4050
const char *BoundArch = nullptr ;
4046
- if (TC->getTriple ().isNVPTX ())
4047
- BoundArch = CudaArchToString ( GpuArchList.front ());
4051
+ if (TC->getTriple ().isNVPTX () || TC-> getTriple (). isAMDGCN () )
4052
+ BoundArch = GpuArchList.front (). second . c_str ( );
4048
4053
DA.add (*DeviceCompilerInput, *TC, BoundArch, Action::OFK_SYCL);
4049
4054
// Clear the input file, it is already a dependence to a host
4050
4055
// action.
@@ -4627,39 +4632,94 @@ class OffloadingActionBuilder final {
4627
4632
}
4628
4633
}
4629
4634
4630
- // / Initialize the GPU architecture list from arguments - this populates `GpuArchList` from
4631
- // / `--cuda-gpu-arch` flags. Only relevant if compiling to CUDA. Return true if any
4632
- // / initialization errors are found.
4635
+ // / Initialize the GPU architecture list from arguments - this populates
4636
+ // / `GpuArchList` from `--offload-arch` flags. Only relevant if compiling to
4637
+ // / CUDA or AMDGCN. Return true if any initialization errors are found.
4638
+ // / FIXME: "offload-arch" and the BoundArch mechanism should also be
4639
+ // used in the SYCLToolChain for SPIR-V AOT to track the offload
4640
+ // architecture instead of the Triple sub-arch it currently uses.
4633
4641
bool initializeGpuArchMap () {
4634
4642
const OptTable &Opts = C.getDriver ().getOpts ();
4635
4643
for (auto *A : Args) {
4636
4644
unsigned Index;
4645
+ llvm::Triple *TargetBE = nullptr ;
4637
4646
4638
- if (A->getOption ().matches (options::OPT_Xsycl_backend_EQ))
4647
+ auto GetTripleIt = [&, this ](llvm::StringRef Triple) {
4648
+ llvm::Triple TargetTriple{Triple};
4649
+ auto TripleIt = llvm::find_if (SYCLTripleList, [&](auto &SYCLTriple) {
4650
+ return SYCLTriple == TargetTriple;
4651
+ });
4652
+ return TripleIt != SYCLTripleList.end () ? &*TripleIt : nullptr ;
4653
+ };
4654
+
4655
+ if (A->getOption ().matches (options::OPT_Xsycl_backend_EQ)) {
4656
+ TargetBE = GetTripleIt (A->getValue (0 ));
4639
4657
// Passing device args: -Xsycl-target-backend=<triple> -opt=val.
4640
- if (llvm::Triple (A-> getValue ( 0 )). isNVPTX () )
4658
+ if (TargetBE )
4641
4659
Index = Args.getBaseArgs ().MakeIndex (A->getValue (1 ));
4642
4660
else
4643
4661
continue ;
4644
- else if (A->getOption ().matches (options::OPT_Xsycl_backend))
4662
+ } else if (A->getOption ().matches (options::OPT_Xsycl_backend)) {
4663
+ if (SYCLTripleList.size () > 1 ) {
4664
+ C.getDriver ().Diag (diag::err_drv_Xsycl_target_missing_triple)
4665
+ << A->getSpelling ();
4666
+ continue ;
4667
+ }
4645
4668
// Passing device args: -Xsycl-target-backend -opt=val.
4669
+ TargetBE = &SYCLTripleList.front ();
4646
4670
Index = Args.getBaseArgs ().MakeIndex (A->getValue (0 ));
4647
- else
4671
+ } else
4648
4672
continue ;
4649
4673
4650
4674
A->claim ();
4651
4675
auto ParsedArg = Opts.ParseOneArg (Args, Index);
4676
+
4652
4677
// TODO: Support --no-cuda-gpu-arch, --{,no-}cuda-gpu-arch=all.
4653
4678
if (ParsedArg &&
4654
4679
ParsedArg->getOption ().matches (options::OPT_offload_arch_EQ)) {
4680
+ llvm::StringRef ArchStr = ParsedArg->getValue (0 );
4681
+ if (TargetBE->isNVPTX ()) {
4682
+ // CUDA arch also applies to AMDGCN ...
4683
+ CudaArch Arch = StringToCudaArch (ArchStr);
4684
+ if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch (Arch)) {
4685
+ C.getDriver ().Diag (clang::diag::err_drv_cuda_bad_gpu_arch)
4686
+ << ArchStr;
4687
+ continue ;
4688
+ }
4689
+ ArchStr = CudaArchToString (Arch);
4690
+ } else if (TargetBE->isAMDGCN ()) {
4691
+ llvm::StringMap<bool > Features;
4692
+ auto Arch =
4693
+ parseTargetID (getHIPOffloadTargetTriple (), ArchStr, &Features);
4694
+ if (!Arch) {
4695
+ C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4696
+ continue ;
4697
+ }
4698
+ auto CanId = getCanonicalTargetID (Arch.getValue (), Features);
4699
+ ArchStr = Args.MakeArgStringRef (CanId);
4700
+ }
4655
4701
ParsedArg->claim ();
4656
- GpuArchList.push_back ( StringToCudaArch (ParsedArg-> getValue ( 0 )) );
4702
+ GpuArchList.emplace_back (*TargetBE, ArchStr );
4657
4703
}
4658
4704
}
4659
4705
4660
- // If there are no CUDA architectures provided then default to SM_50.
4661
- if (GpuArchList.empty ()) {
4662
- GpuArchList.push_back (CudaArch::SM_50);
4706
+ // Handle defaults architectures
4707
+ for (auto &Triple : SYCLTripleList) {
4708
+ // For NVIDIA use SM_50 as a default
4709
+ if (Triple.isNVPTX () && llvm::none_of (GpuArchList, [&](auto &P) {
4710
+ return P.first .isNVPTX ();
4711
+ })) {
4712
+ llvm::StringRef DefaultArch = CudaArchToString (CudaArch::SM_50);
4713
+ GpuArchList.emplace_back (Triple, DefaultArch);
4714
+ }
4715
+
4716
+ // For AMD require the architecture to be set by the user
4717
+ if (Triple.isAMDGCN () && llvm::none_of (GpuArchList, [&](auto &P) {
4718
+ return P.first .isAMDGCN ();
4719
+ })) {
4720
+ C.getDriver ().Diag (clang::diag::err_drv_sycl_missing_amdgpu_arch);
4721
+ return true ;
4722
+ }
4663
4723
}
4664
4724
4665
4725
return false ;
0 commit comments