Skip to content

[OpenMP][libc] Remove special handling for OpenMP printf #98940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5986,8 +5986,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
getTarget().getTriple().isAMDGCN() ||
(getTarget().getTriple().isSPIRV() &&
getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
if (getLangOpts().OpenMPIsTargetDevice)
return EmitOpenMPDevicePrintfCallExpr(E);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is EmitDevicePrintfCallExpr now dead and deletable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in this patch. It's still used for NVPTX because CUDA doesn't know it can do varargs yet. We could replace that with the same code I put in LibC.h here in one of the CUDA headers if we wanted to. OpenCL and HIP also still use the ROCm Device Libs for this.

if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E);
if ((getTarget().getTriple().isAMDGCN() ||
Expand Down
29 changes: 0 additions & 29 deletions clang/lib/CodeGen/CGGPUBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,6 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
}

llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
const char *Name = "__llvm_omp_vprintf";
llvm::Module &M = CGM.getModule();
llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
llvm::PointerType::getUnqual(M.getContext()),
llvm::Type::getInt32Ty(M.getContext())};
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);

if (auto *F = M.getFunction(Name)) {
if (F->getFunctionType() != VprintfFuncType) {
CGM.Error(SourceLocation(),
"Invalid type declaration for __llvm_omp_vprintf");
return nullptr;
}
return F;
}

return llvm::Function::Create(
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
}

// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
// isn't particularly special; it's invoked just like a regular function).
// vprintf takes two args: A format string, and a pointer to a buffer containing
Expand Down Expand Up @@ -213,10 +191,3 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
return RValue::get(Printf);
}

RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
assert(getTarget().getTriple().isNVPTX() ||
getTarget().getTriple().isAMDGCN());
return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
true);
}
1 change: 0 additions & 1 deletion clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4536,7 +4536,6 @@ class CodeGenFunction : public CodeGenTypeCache {

RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);

RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E, ReturnValueSlot ReturnValue);
Expand Down
1 change: 0 additions & 1 deletion libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ set(TARGET_LIBC_ENTRYPOINTS

# gpu/rpc.h entrypoints
libc.src.gpu.rpc_host_call
libc.src.gpu.rpc_fprintf
)

set(TARGET_LIBM_ENTRYPOINTS
Expand Down
8 changes: 0 additions & 8 deletions libc/spec/gpu_ext.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
RetValSpec<VoidType>,
[ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
>,
FunctionSpec<
"rpc_fprintf",
RetValSpec<IntType>,
[ArgSpec<FILERestrictedPtr>,
ArgSpec<ConstCharRestrictedPtr>,
ArgSpec<VoidPtr>,
ArgSpec<SizeTType>]
>,
]
>;
let Headers = [
Expand Down
12 changes: 0 additions & 12 deletions libc/src/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,3 @@ add_entrypoint_object(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)

add_entrypoint_object(
rpc_fprintf
SRCS
rpc_fprintf.cpp
HDRS
rpc_fprintf.h
DEPENDS
libc.src.stdio.gpu.gpu_file
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)
75 changes: 0 additions & 75 deletions libc/src/gpu/rpc_fprintf.cpp

This file was deleted.

23 changes: 0 additions & 23 deletions libc/src/gpu/rpc_fprintf.h

This file was deleted.

3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,8 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
return false;

auto PrintfFunction = M.getFunction("printf");
if (!PrintfFunction || !PrintfFunction->isDeclaration())
if (!PrintfFunction || !PrintfFunction->isDeclaration() ||
M.getModuleFlag("openmp"))
return false;

for (auto &U : PrintfFunction->uses()) {
Expand Down
1 change: 0 additions & 1 deletion offload/DeviceRTL/include/LibC.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ extern "C" {

int memcmp(const void *lhs, const void *rhs, size_t count);
void memset(void *dst, int C, size_t count);

int printf(const char *format, ...);
}

Expand Down
44 changes: 14 additions & 30 deletions offload/DeviceRTL/src/LibC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,44 +11,33 @@
#pragma omp begin declare target device_type(nohost)

namespace impl {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
int32_t omp_vprintf(const char *Format, __builtin_va_list vlist);
}

#ifndef OMPTARGET_HAS_LIBC
namespace impl {
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, \
implementation = {extension(match_any)})
extern "C" int32_t vprintf(const char *, void *);
namespace impl {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return vprintf(Format, Arguments);
extern "C" int vprintf(const char *format, ...);
int omp_vprintf(const char *Format, __builtin_va_list vlist) {
return vprintf(Format, vlist);
}
} // namespace impl
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {arch(amdgcn)})

#ifdef OMPTARGET_HAS_LIBC
// TODO: Remove this handling once we have varargs support.
extern "C" struct FILE *stdout;
extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t);

namespace impl {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
return rpc_fprintf(stdout, Format, Arguments, Size);
}
int omp_vprintf(const char *Format, __builtin_va_list) { return -1; }
#pragma omp end declare variant
} // namespace impl
#else
// We do not have a vprintf implementation for AMD GPU so we use a stub.
namespace impl {
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return -1;

extern "C" int printf(const char *Format, ...) {
__builtin_va_list vlist;
__builtin_va_start(vlist, Format);
return impl::omp_vprintf(Format, vlist);
}
} // namespace impl
#endif
#pragma omp end declare variant
#endif // OMPTARGET_HAS_LIBC

extern "C" {

[[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) {
auto *L = reinterpret_cast<const unsigned char *>(lhs);
auto *R = reinterpret_cast<const unsigned char *>(rhs);
Expand All @@ -65,11 +54,6 @@ extern "C" {
for (size_t I = 0; I < count; ++I)
dstc[I] = C;
}

/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
return impl::omp_vprintf(Format, Arguments, Size);
}
}

#pragma omp end declare target
Loading