Skip to content

[LinkerWrapper] Support relocatable linking for offloading #80066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/test/Driver/linker-wrapper-image.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// RUN: -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF

Expand All @@ -19,6 +21,8 @@
// OPENMP-COFF: @__start_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OA"
// OPENMP-COFF-NEXT: @__stop_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OZ"

// OPENMP-REL: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading.relocatable", align 8

// OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8
// OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr inbounds ([[[BEGIN:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr getelementptr inbounds ([[[END:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
// OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
Expand All @@ -42,6 +46,8 @@
// RUN: -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF

Expand Down Expand Up @@ -140,6 +146,8 @@
// RUN: -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF

Expand Down
32 changes: 29 additions & 3 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,36 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
// RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: llvm-ar rcs %t.a %t.o
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld.lld -- -r --whole-archive %t.a --no-whole-archive \
// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to check device fatbin bundling is done and device wrapper fatbin variable using internal linkage and in postfixed section.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a test for the HIP and CUDA cases as well as an additional check on passing -r to the wrapper image generation.

// RELOCATABLE-LINK-NOT: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu
// RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu
// RELOCATABLE-LINK: /usr/bin/ld.lld{{.*}}-r
// RELOCATABLE-LINK: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
// RUN: --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-HIP

// RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa
// RELOCATABLE-LINK-HIP: clang-offload-bundler{{.*}} -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx90a -input=/dev/null -input={{.*}} -output={{.*}}
// RELOCATABLE-LINK-HIP: /usr/bin/ld.lld{{.*}}-r
// RELOCATABLE-LINK-HIP: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading

// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89 \
// RUN: --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
// RUN: --linker-path=/usr/bin/ld.lld -- -r %t.o \
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA

// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda
// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img
// RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r
// RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
78 changes: 71 additions & 7 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,70 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
return *Path;
}

/// Returns the hashed value for a constant string.
std::string getHash(StringRef Str) {
llvm::MD5 Hasher;
llvm::MD5::MD5Result Hash;
Hasher.update(Str);
Hasher.final(Hash);
return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
}

/// Renames offloading entry sections in a relocatable link so they do not
/// conflict with a later link job.
Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
llvm::Triple Triple(
Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
if (Triple.isOSWindows())
return createStringError(
inconvertibleErrorCode(),
"Relocatable linking is not supported on COFF targets");

Expected<std::string> ObjcopyPath =
findProgram("llvm-objcopy", {getMainExecutable("llvm-objcopy")});
if (!ObjcopyPath)
return ObjcopyPath.takeError();

// Use the linker output file to get a unique hash. This creates a unique
// identifier to rename the sections to that is deterministic to the contents.
auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer("")
: MemoryBuffer::getFileOrSTDIN(Output);
if (!BufferOrErr)
return createStringError(inconvertibleErrorCode(), "Failed to open %s",
Output.str().c_str());
std::string Suffix = "_" + getHash((*BufferOrErr)->getBuffer());

SmallVector<StringRef> ObjcopyArgs = {
*ObjcopyPath,
Output,
};

// Remove the old .llvm.offloading section to prevent further linking.
ObjcopyArgs.emplace_back("--remove-section");
ObjcopyArgs.emplace_back(".llvm.offloading");
for (StringRef Prefix : {"omp", "cuda", "hip"}) {
auto Section = (Prefix + "_offloading_entries").str();
// Rename the offloading entires to make them private to this link unit.
ObjcopyArgs.emplace_back("--rename-section");
ObjcopyArgs.emplace_back(
Args.MakeArgString(Section + "=" + Section + Suffix));

// Rename the __start_ / __stop_ symbols appropriately to iterate over the
// newly renamed section containing the offloading entries.
ObjcopyArgs.emplace_back("--redefine-sym");
ObjcopyArgs.emplace_back(Args.MakeArgString("__start_" + Section + "=" +
"__start_" + Section + Suffix));
ObjcopyArgs.emplace_back("--redefine-sym");
ObjcopyArgs.emplace_back(Args.MakeArgString("__stop_" + Section + "=" +
"__stop_" + Section + Suffix));
}

if (Error Err = executeCommands(*ObjcopyPath, ObjcopyArgs))
return Err;

return Error::success();
}

/// Runs the wrapped linker job with the newly created input.
Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
llvm::TimeTraceScope TimeScope("Execute host linker");
Expand All @@ -265,6 +329,11 @@ Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
LinkerArgs.push_back(Arg);
if (Error Err = executeCommands(LinkerPath, LinkerArgs))
return Err;

if (Args.hasArg(OPT_relocatable))
if (Error Err = relocateOffloadSection(Args, ExecutableName))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could just return relocateOffloadSection(Args, ExecutableName)

return Err;

return Error::success();
}

Expand Down Expand Up @@ -910,7 +979,8 @@ wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
case OFK_OpenMP:
if (Error Err = offloading::wrapOpenMPBinaries(
M, BuffersToWrap,
offloading::getOffloadEntryArray(M, "omp_offloading_entries")))
offloading::getOffloadEntryArray(M, "omp_offloading_entries"),
/*Suffix=*/"", /*Relocatable=*/Args.hasArg(OPT_relocatable)))
return std::move(Err);
break;
case OFK_Cuda:
Expand Down Expand Up @@ -1356,12 +1426,6 @@ Expected<SmallVector<SmallVector<OffloadFile>>>
getDeviceInput(const ArgList &Args) {
llvm::TimeTraceScope TimeScope("ExtractDeviceCode");

// If the user is requesting a reloctable link we ignore the device code. The
// actual linker will merge the embedded device code sections so they can be
// linked when the executable is finally created.
if (Args.hasArg(OPT_relocatable))
return SmallVector<SmallVector<OffloadFile>>{};

StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ);
SmallVector<StringRef> LibraryPaths;
for (const opt::Arg *Arg : Args.filtered(OPT_library_path, OPT_libpath))
Expand Down
4 changes: 3 additions & 1 deletion llvm/include/llvm/Frontend/Offloading/OffloadWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ using EntryArrayTy = std::pair<GlobalVariable *, GlobalVariable *>;
/// \param EntryArray Optional pair pointing to the `__start` and `__stop`
/// symbols holding the `__tgt_offload_entry` array.
/// \param Suffix An optional suffix appended to the emitted symbols.
/// \param Relocatable Indicate if we need to change the offloading section.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: "Indicate whether the binary is a relocatable object" may work a bit better for describing intent. Current description seems to describe an implementation detail.

llvm::Error wrapOpenMPBinaries(llvm::Module &M,
llvm::ArrayRef<llvm::ArrayRef<char>> Images,
EntryArrayTy EntryArray,
llvm::StringRef Suffix = "");
llvm::StringRef Suffix = "",
bool Relocatable = false);

/// Wraps the input fatbinary image into the module \p M as global symbols and
/// registers the images with the CUDA runtime.
Expand Down
11 changes: 7 additions & 4 deletions llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ PointerType *getBinDescPtrTy(Module &M) {
///
/// Global variable that represents BinDesc is returned.
GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
EntryArrayTy EntryArray, StringRef Suffix) {
EntryArrayTy EntryArray, StringRef Suffix,
bool Relocatable) {
LLVMContext &C = M.getContext();
auto [EntriesB, EntriesE] = EntryArray;

Expand All @@ -129,7 +130,8 @@ GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
GlobalVariable::InternalLinkage, Data,
".omp_offloading.device_image" + Suffix);
Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Image->setSection(".llvm.offloading");
Image->setSection(Relocatable ? ".llvm.offloading.relocatable"
: ".llvm.offloading");
Image->setAlignment(Align(object::OffloadBinary::getAlignment()));

StringRef Binary(Buf.data(), Buf.size());
Expand Down Expand Up @@ -582,8 +584,9 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,

Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images,
EntryArrayTy EntryArray,
llvm::StringRef Suffix) {
GlobalVariable *Desc = createBinDesc(M, Images, EntryArray, Suffix);
llvm::StringRef Suffix, bool Relocatable) {
GlobalVariable *Desc =
createBinDesc(M, Images, EntryArray, Suffix, Relocatable);
if (!Desc)
return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Object/OffloadBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Error extractFromObject(const ObjectFile &Obj,
if (!NameOrErr)
return NameOrErr.takeError();

if (!NameOrErr->equals(".llvm.offloading"))
if (!NameOrErr->starts_with(".llvm.offloading"))
continue;
}

Expand Down