Skip to content

Commit 455cedc

Browse files
authored
[Clang] Make -Xarch_ handling generic for all toolchains (#125421)
Summary: Currently, `-Xarch_` is handled specially between different toolchains, (i.e. Mach-O). This patch unifies the handling so that it can be used generically. The main benefit here is that we now have a more generic version of `-Xopenmp-target=`, which should probably just be deprecated. Additionally, it allows us to specially pass arguments to different architectures for offloading. This patch is done in preparation for making selecting offloading toolchains more generic, this will be helpful while people are moving toward compile jobs that include multiple toolchains (SPIR-V, AMDGCN, NVPTX).
1 parent cd754af commit 455cedc

File tree

6 files changed

+82
-39
lines changed

6 files changed

+82
-39
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group<W_Group>,
932932
def Xanalyzer : Separate<["-"], "Xanalyzer">,
933933
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
934934
Group<StaticAnalyzer_Group>;
935-
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>;
935+
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
936+
HelpText<"Pass <arg> to the compiliation if the target matches <arch>">,
937+
MetaVarName<"<arch> <arg>">;
936938
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>,
937939
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
938940
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>,
@@ -1115,8 +1117,8 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,
11151117

11161118
// Common offloading options
11171119
let Group = offload_Group in {
1118-
def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>,
1119-
Visibility<[ClangOption, FlangOption]>,
1120+
def offload_arch_EQ : Joined<["--"], "offload-arch=">,
1121+
Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>,
11201122
HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). "
11211123
"If 'native' is used the compiler will detect locally installed architectures. "
11221124
"For HIP offloading, the device architecture can be followed by target ID features "

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3409,7 +3409,9 @@ class OffloadingActionBuilder final {
34093409
// Collect all offload arch parameters, removing duplicates.
34103410
std::set<StringRef> GpuArchs;
34113411
bool Error = false;
3412-
for (Arg *A : Args) {
3412+
const ToolChain &TC = *ToolChains.front();
3413+
for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"",
3414+
AssociatedOffloadKind)) {
34133415
if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
34143416
A->getOption().matches(options::OPT_no_offload_arch_EQ)))
34153417
continue;
@@ -3420,7 +3422,6 @@ class OffloadingActionBuilder final {
34203422
ArchStr == "all") {
34213423
GpuArchs.clear();
34223424
} else if (ArchStr == "native") {
3423-
const ToolChain &TC = *ToolChains.front();
34243425
auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
34253426
if (!GPUsOrErr) {
34263427
TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)

clang/lib/Driver/ToolChain.cpp

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs(
16481648
A->getOption().matches(options::OPT_Xarch_host))
16491649
ValuePos = 0;
16501650

1651-
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
1651+
const InputArgList &BaseArgs = Args.getBaseArgs();
1652+
unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos));
16521653
unsigned Prev = Index;
16531654
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
16541655

@@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs(
16721673
Diags.Report(DiagID) << A->getAsString(Args);
16731674
return;
16741675
}
1676+
16751677
XarchArg->setBaseArg(A);
16761678
A = XarchArg.release();
1679+
1680+
// Linker input arguments require custom handling. The problem is that we
1681+
// have already constructed the phase actions, so we can not treat them as
1682+
// "input arguments".
1683+
if (A->getOption().hasFlag(options::LinkerInput)) {
1684+
// Convert the argument into individual Zlinker_input_args. Need to do this
1685+
// manually to avoid memory leaks with the allocated arguments.
1686+
for (const char *Value : A->getValues()) {
1687+
auto Opt = Opts.getOption(options::OPT_Zlinker_input);
1688+
unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
1689+
auto NewArg =
1690+
new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()),
1691+
Index, BaseArgs.getArgString(Index + 1), A);
1692+
1693+
DAL->append(NewArg);
1694+
if (!AllocatedArgs)
1695+
DAL->AddSynthesizedArg(NewArg);
1696+
else
1697+
AllocatedArgs->push_back(NewArg);
1698+
}
1699+
}
1700+
16771701
if (!AllocatedArgs)
16781702
DAL->AddSynthesizedArg(A);
16791703
else
@@ -1697,19 +1721,17 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
16971721
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
16981722
NeedTrans = !IsDevice;
16991723
Skip = IsDevice;
1700-
} else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) {
1701-
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
1702-
// they may need special translation.
1703-
// Skip this argument unless the architecture matches BoundArch
1704-
if (BoundArch.empty() || A->getValue(0) != BoundArch)
1705-
Skip = true;
1706-
else
1707-
NeedTrans = true;
1724+
} else if (A->getOption().matches(options::OPT_Xarch__)) {
1725+
NeedTrans = A->getValue() == getArchName() ||
1726+
(!BoundArch.empty() && A->getValue() == BoundArch);
1727+
Skip = !NeedTrans;
17081728
}
17091729
if (NeedTrans || Skip)
17101730
Modified = true;
1711-
if (NeedTrans)
1731+
if (NeedTrans) {
1732+
A->claim();
17121733
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
1734+
}
17131735
if (!Skip)
17141736
DAL->append(A);
17151737
}

clang/lib/Driver/ToolChains/Darwin.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
27772777
// and try to push it down into tool specific logic.
27782778

27792779
for (Arg *A : Args) {
2780-
if (A->getOption().matches(options::OPT_Xarch__)) {
2781-
// Skip this argument unless the architecture matches either the toolchain
2782-
// triple arch, or the arch being bound.
2783-
StringRef XarchArch = A->getValue(0);
2784-
if (!(XarchArch == getArchName() ||
2785-
(!BoundArch.empty() && XarchArch == BoundArch)))
2786-
continue;
2787-
2788-
Arg *OriginalArg = A;
2789-
TranslateXarchArgs(Args, A, DAL);
2790-
2791-
// Linker input arguments require custom handling. The problem is that we
2792-
// have already constructed the phase actions, so we can not treat them as
2793-
// "input arguments".
2794-
if (A->getOption().hasFlag(options::LinkerInput)) {
2795-
// Convert the argument into individual Zlinker_input_args.
2796-
for (const char *Value : A->getValues()) {
2797-
DAL->AddSeparateArg(
2798-
OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
2799-
}
2800-
continue;
2801-
}
2802-
}
2803-
28042780
// Sob. These is strictly gcc compatible for the time being. Apple
28052781
// gcc translates options twice, which means that self-expanding
28062782
// options add duplicates.

clang/test/Driver/Xarch.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
// RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
2+
// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
3+
// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
4+
// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
5+
// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
26
// O3ONCE: "-O3"
37
// O3ONCE-NOT: "-O3"
48

59
// RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
10+
// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
611
// O3NONE-NOT: "-O3"
712
// O3NONE: argument unused during compilation: '-Xarch_i386 -O3'
813

914
// RUN: not %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -o -Xarch_i386 -S %s -S -Xarch_i386 -o 2>&1 | FileCheck -check-prefix=INVALID %s
1015
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
1116
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -S'
1217
// INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
18+
19+
// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 2>&1 | FileCheck -check-prefix=LINKER %s
20+
// LINKER: "foo"

clang/test/Driver/offload-Xarch.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %clang -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
2+
// RUN: %clang -x cuda %s -Xarch_device -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
3+
// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
4+
// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc \
5+
// RUN: -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \
6+
// RUN: | FileCheck -check-prefix=O3ONCE %s
7+
// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
8+
// RUN: -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \
9+
// RUN: | FileCheck -check-prefix=O3ONCE %s
10+
// O3ONCE: "-O3"
11+
// O3ONCE-NOT: "-O3"
12+
13+
// RUN: %clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \
14+
// RUN: --target=x86_64-unknown-linux-gnu -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60 -nogpuinc \
15+
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### %s 2>&1 \
16+
// RUN: | FileCheck -check-prefix=OPENMP %s
17+
//
18+
// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
19+
// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]"
20+
// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]"
21+
// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM52_PTX:.+]]"
22+
// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]"
23+
// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM60_PTX:.+]]"
24+
// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]"
25+
// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], output: "[[BINARY:.+]]"
26+
// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
27+
// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
28+
29+
// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 -Xarch_sm_60 -O0 \
30+
// RUN: --target=x86_64-unknown-linux-gnu -Xarch_host -O3 -S -nogpulib -nogpuinc -### 2>&1 \
31+
// RUN: | FileCheck -check-prefix=CUDA %s
32+
// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" {{.*}}"-O3"
33+
// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" {{.*}}"-O0"
34+
// CUDA: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}}"-O3"

0 commit comments

Comments
 (0)