-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[OpenMP] Remove 'libomptarget.devicertl.a' fatbinary and use static library #126143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-offload @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) ChangesSummary: This patch creates two new static libraries that get installed into
for AMDGPU and NVPTX respectively. The link job created by the linker This patch is a precursor to changing the build system entirely to be a NOTE that this actually does remove an additional optimization step. Performance testing will be required. If we really need the merged blob Full diff: https://github.com/llvm/llvm-project/pull/126143.diff 4 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c0891d46b0a62cd..fd690ab11c1c2c3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9209,6 +9209,10 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
A->render(Args, LinkerArgs);
}
+ // If this is OpenMP the device linker will need `-lomp`.
+ if (Kind == Action::OFK_OpenMP && !Args.hasArg(OPT_nogpulib))
+ LinkerArgs.emplace_back("-lomp");
+
// Forward all of these to the appropriate toolchain.
for (StringRef Arg : CompilerArgs)
CmdArgs.push_back(Args.MakeArgString(
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 699aadec86dcba9..93031d2f5302386 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1289,9 +1289,6 @@ bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs,
if (IsOffloadingHost)
CmdArgs.push_back("-lomptarget");
- if (IsOffloadingHost && !Args.hasArg(options::OPT_nogpulib))
- CmdArgs.push_back("-lomptarget.devicertl");
-
addArchSpecificRPath(TC, Args, CmdArgs);
addOpenMPRuntimeLibraryPath(TC, Args, CmdArgs);
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 8f2a1fd01fabcc8..b3dd4a1997d80d0 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -107,15 +107,15 @@ set(bc_flags -c -flto -std=c++17 -fvisibility=hidden
)
# first create an object target
-add_library(omptarget.devicertl.all_objs OBJECT IMPORTED)
function(compileDeviceRTLLibrary target_name target_triple)
set(target_bc_flags ${ARGN})
set(bc_files "")
+ add_library(omp.${target_name}.all_objs OBJECT IMPORTED)
foreach(src ${src_files})
get_filename_component(infile ${src} ABSOLUTE)
get_filename_component(outfile ${src} NAME)
- set(outfile "${outfile}-${target_name}.bc")
+ set(outfile "${outfile}-${target_name}.o")
set(depfile "${outfile}.d")
# Passing an empty CPU to -march= suppressed target specific metadata.
@@ -142,99 +142,36 @@ function(compileDeviceRTLLibrary target_name target_triple)
endif()
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile})
- list(APPEND bc_files ${outfile})
+ list(APPEND obj_files ${CMAKE_CURRENT_BINARY_DIR}/${outfile})
endforeach()
-
- set(bclib_name "libomptarget-${target_name}.bc")
-
- # Link to a bitcode library.
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- COMMAND ${LINK_TOOL}
- -o ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} ${bc_files}
- DEPENDS ${bc_files}
- COMMENT "Linking LLVM bitcode ${bclib_name}"
- )
-
- if(TARGET llvm-link)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- DEPENDS llvm-link
- APPEND)
- endif()
-
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- COMMAND ${OPT_TOOL} ${link_export_flag} ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- -o ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- DEPENDS ${source_directory}/exports ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- COMMENT "Internalizing LLVM bitcode ${bclib_name}"
- )
- if(TARGET opt)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- DEPENDS opt
- APPEND)
- endif()
-
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- COMMENT "Optimizing LLVM bitcode ${bclib_name}"
- )
- if(TARGET opt)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- DEPENDS opt
- APPEND)
- endif()
-
- set(bclib_target_name "omptarget-${target_name}-bc")
- add_custom_target(${bclib_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name})
-
- # Copy library to destination.
- add_custom_command(TARGET ${bclib_target_name} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- ${LIBOMPTARGET_LIBRARY_DIR})
- add_dependencies(omptarget.devicertl.${target_name} ${bclib_target_name})
-
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name} ${LIBOMPTARGET_LIBRARY_DIR}/${bclib_name})
-
- # Install bitcode library under the lib destination folder.
- install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
-
- set(target_feature "")
- if("${target_triple}" STREQUAL "nvptx64-nvidia-cuda")
- set(target_feature "feature=+ptx63")
- endif()
-
- # Package the bitcode in the bitcode and embed it in an ELF for the static library
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},${target_feature},triple=${target_triple},arch=generic,kind=openmp"
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
+ set_property(TARGET omp.${target_name}.all_objs
+ APPEND PROPERTY IMPORTED_OBJECTS ${obj_files})
+
+ # Archive all the object files generated above into a static library
+ add_library(omp.${target_name} STATIC)
+ set_target_properties(omp.${target_name} PROPERTIES
+ ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/${target_triple}"
+ ARCHIVE_OUTPUT_NAME omp
+ LINKER_LANGUAGE CXX
)
- if(TARGET clang-offload-packager)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- DEPENDS clang-offload-packager
- APPEND)
- endif()
-
- set(output_name "${CMAKE_CURRENT_BINARY_DIR}/devicertl-${target_name}.o")
- add_custom_command(OUTPUT ${output_name}
- COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib
- -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- -o ${output_name}
- ${source_directory}/Stub.cpp
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} ${source_directory}/Stub.cpp
- COMMENT "Embedding LLVM offloading binary in devicertl-${target_name}.o"
- VERBATIM
- )
- if(TARGET clang)
- add_custom_command(OUTPUT ${output_name}
- DEPENDS clang
- APPEND)
- endif()
-
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${output_name})
- set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name})
+ target_link_libraries(omp.${target_name} PRIVATE omp.${target_name}.all_objs)
+
+ install(TARGETS omp.${target_name}
+ ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}")
+
+ # Trick to combine these into a bitcode file via the linker's LTO pass. This
+ # is used to provide the legacy `libomptarget-<name>.bc` files.
+ add_executable(libomptarget-${target_name} ${obj_files})
+ set_target_properties(libomptarget-${target_name} PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}
+ LINKER_LANGUAGE CXX
+ RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
+ target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}")
+ target_link_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}"
+ "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
+ install(TARGETS libomptarget-${target_name}
+ PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ
+ DESTINATION ${OFFLOAD_INSTALL_LIBDIR})
if (CMAKE_EXPORT_COMPILE_COMMANDS)
set(ide_target_name omptarget-ide-${target_name})
@@ -259,13 +196,3 @@ compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=n
add_custom_target(omptarget.devicertl.nvptx)
compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
-
-# Archive all the object files generated above into a static library
-add_library(omptarget.devicertl STATIC)
-set_target_properties(omptarget.devicertl PROPERTIES
- ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}"
- LINKER_LANGUAGE CXX
-)
-target_link_libraries(omptarget.devicertl PRIVATE omptarget.devicertl.all_objs)
-
-install(TARGETS omptarget.devicertl ARCHIVE DESTINATION ${OFFLOAD_INSTALL_LIBDIR})
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 658ae5f9653ba90..565edc3e7faeb9d 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -183,11 +183,11 @@ def remove_suffix_if_present(name):
def add_libraries(source):
if config.libomptarget_has_libc:
- return source + " -Xoffload-linker " + "-lc " + \
- "-Xoffload-linker " + "-lm " + \
- config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ return source + " -Xoffload-linker -lc " + \
+ "-Xoffload-linker -lm " + \
+ "-Xoffload-linker -lomp "
else:
- return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ return source + " " + "-Xoffload-lnker -lomp"
# Add platform targets
host_targets = [
|
@llvm/pr-subscribers-clang-driver Author: Joseph Huber (jhuber6) ChangesSummary: This patch creates two new static libraries that get installed into
for AMDGPU and NVPTX respectively. The link job created by the linker This patch is a precursor to changing the build system entirely to be a NOTE that this actually does remove an additional optimization step. Performance testing will be required. If we really need the merged blob Full diff: https://github.com/llvm/llvm-project/pull/126143.diff 4 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c0891d46b0a62cd..fd690ab11c1c2c3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9209,6 +9209,10 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
A->render(Args, LinkerArgs);
}
+ // If this is OpenMP the device linker will need `-lomp`.
+ if (Kind == Action::OFK_OpenMP && !Args.hasArg(OPT_nogpulib))
+ LinkerArgs.emplace_back("-lomp");
+
// Forward all of these to the appropriate toolchain.
for (StringRef Arg : CompilerArgs)
CmdArgs.push_back(Args.MakeArgString(
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 699aadec86dcba9..93031d2f5302386 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1289,9 +1289,6 @@ bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs,
if (IsOffloadingHost)
CmdArgs.push_back("-lomptarget");
- if (IsOffloadingHost && !Args.hasArg(options::OPT_nogpulib))
- CmdArgs.push_back("-lomptarget.devicertl");
-
addArchSpecificRPath(TC, Args, CmdArgs);
addOpenMPRuntimeLibraryPath(TC, Args, CmdArgs);
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 8f2a1fd01fabcc8..b3dd4a1997d80d0 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -107,15 +107,15 @@ set(bc_flags -c -flto -std=c++17 -fvisibility=hidden
)
# first create an object target
-add_library(omptarget.devicertl.all_objs OBJECT IMPORTED)
function(compileDeviceRTLLibrary target_name target_triple)
set(target_bc_flags ${ARGN})
set(bc_files "")
+ add_library(omp.${target_name}.all_objs OBJECT IMPORTED)
foreach(src ${src_files})
get_filename_component(infile ${src} ABSOLUTE)
get_filename_component(outfile ${src} NAME)
- set(outfile "${outfile}-${target_name}.bc")
+ set(outfile "${outfile}-${target_name}.o")
set(depfile "${outfile}.d")
# Passing an empty CPU to -march= suppressed target specific metadata.
@@ -142,99 +142,36 @@ function(compileDeviceRTLLibrary target_name target_triple)
endif()
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile})
- list(APPEND bc_files ${outfile})
+ list(APPEND obj_files ${CMAKE_CURRENT_BINARY_DIR}/${outfile})
endforeach()
-
- set(bclib_name "libomptarget-${target_name}.bc")
-
- # Link to a bitcode library.
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- COMMAND ${LINK_TOOL}
- -o ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} ${bc_files}
- DEPENDS ${bc_files}
- COMMENT "Linking LLVM bitcode ${bclib_name}"
- )
-
- if(TARGET llvm-link)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- DEPENDS llvm-link
- APPEND)
- endif()
-
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- COMMAND ${OPT_TOOL} ${link_export_flag} ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- -o ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- DEPENDS ${source_directory}/exports ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
- COMMENT "Internalizing LLVM bitcode ${bclib_name}"
- )
- if(TARGET opt)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- DEPENDS opt
- APPEND)
- endif()
-
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
- COMMENT "Optimizing LLVM bitcode ${bclib_name}"
- )
- if(TARGET opt)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- DEPENDS opt
- APPEND)
- endif()
-
- set(bclib_target_name "omptarget-${target_name}-bc")
- add_custom_target(${bclib_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name})
-
- # Copy library to destination.
- add_custom_command(TARGET ${bclib_target_name} POST_BUILD
- COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- ${LIBOMPTARGET_LIBRARY_DIR})
- add_dependencies(omptarget.devicertl.${target_name} ${bclib_target_name})
-
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name} ${LIBOMPTARGET_LIBRARY_DIR}/${bclib_name})
-
- # Install bitcode library under the lib destination folder.
- install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
-
- set(target_feature "")
- if("${target_triple}" STREQUAL "nvptx64-nvidia-cuda")
- set(target_feature "feature=+ptx63")
- endif()
-
- # Package the bitcode in the bitcode and embed it in an ELF for the static library
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},${target_feature},triple=${target_triple},arch=generic,kind=openmp"
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
- COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
+ set_property(TARGET omp.${target_name}.all_objs
+ APPEND PROPERTY IMPORTED_OBJECTS ${obj_files})
+
+ # Archive all the object files generated above into a static library
+ add_library(omp.${target_name} STATIC)
+ set_target_properties(omp.${target_name} PROPERTIES
+ ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/${target_triple}"
+ ARCHIVE_OUTPUT_NAME omp
+ LINKER_LANGUAGE CXX
)
- if(TARGET clang-offload-packager)
- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- DEPENDS clang-offload-packager
- APPEND)
- endif()
-
- set(output_name "${CMAKE_CURRENT_BINARY_DIR}/devicertl-${target_name}.o")
- add_custom_command(OUTPUT ${output_name}
- COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib
- -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- -o ${output_name}
- ${source_directory}/Stub.cpp
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} ${source_directory}/Stub.cpp
- COMMENT "Embedding LLVM offloading binary in devicertl-${target_name}.o"
- VERBATIM
- )
- if(TARGET clang)
- add_custom_command(OUTPUT ${output_name}
- DEPENDS clang
- APPEND)
- endif()
-
- set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${output_name})
- set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name})
+ target_link_libraries(omp.${target_name} PRIVATE omp.${target_name}.all_objs)
+
+ install(TARGETS omp.${target_name}
+ ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}/${target_triple}")
+
+ # Trick to combine these into a bitcode file via the linker's LTO pass. This
+ # is used to provide the legacy `libomptarget-<name>.bc` files.
+ add_executable(libomptarget-${target_name} ${obj_files})
+ set_target_properties(libomptarget-${target_name} PROPERTIES
+ RUNTIME_OUTPUT_DIRECTORY ${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}
+ LINKER_LANGUAGE CXX
+ RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
+ target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}")
+ target_link_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}"
+ "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
+ install(TARGETS libomptarget-${target_name}
+ PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ
+ DESTINATION ${OFFLOAD_INSTALL_LIBDIR})
if (CMAKE_EXPORT_COMPILE_COMMANDS)
set(ide_target_name omptarget-ide-${target_name})
@@ -259,13 +196,3 @@ compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=n
add_custom_target(omptarget.devicertl.nvptx)
compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
-
-# Archive all the object files generated above into a static library
-add_library(omptarget.devicertl STATIC)
-set_target_properties(omptarget.devicertl PROPERTIES
- ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}"
- LINKER_LANGUAGE CXX
-)
-target_link_libraries(omptarget.devicertl PRIVATE omptarget.devicertl.all_objs)
-
-install(TARGETS omptarget.devicertl ARCHIVE DESTINATION ${OFFLOAD_INSTALL_LIBDIR})
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 658ae5f9653ba90..565edc3e7faeb9d 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -183,11 +183,11 @@ def remove_suffix_if_present(name):
def add_libraries(source):
if config.libomptarget_has_libc:
- return source + " -Xoffload-linker " + "-lc " + \
- "-Xoffload-linker " + "-lm " + \
- config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ return source + " -Xoffload-linker -lc " + \
+ "-Xoffload-linker -lm " + \
+ "-Xoffload-linker -lomp "
else:
- return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ return source + " " + "-Xoffload-lnker -lomp"
# Add platform targets
host_targets = [
|
ea890db
to
cda46da
Compare
I'm not sure if calling it |
I second @shiltian's concern. Host-side and device-side libraries should have different names1 if there differences between them. We have host-offloading as well. Footnotes
|
Sure, I can name it something else. Maybe we could just tall it |
We already have a host-side
|
We can have conflicting names since they're in separate install directories. I'd like this to have a more 'standard' name if possible, and I feel putting a |
Should it be named as |
I don't want |
I figured that calling it |
e3962f9
to
4d325d6
Compare
+1, though as @jhuber6 mentioned, we did name it prefix with |
Clang was built from the same commit id (ee4c8b5). |
How did you disable it? Perhaps it's failing because of the specific error:
For comparison, |
I just set
Can't decide if we should indicate why it failed, maybe add a verbose mode or something. |
I'm sorry, I've unmerged these packages already after trying an older commit. Lemme build them again. |
Sorry, didn't notice this sentence. Well, I am building with
To be honest, this only proves my point — you shouldn't be invoking |
Hence why the follow-up to this will move it to a separate build where you pass https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER_TARGET.html or the LLVM runtimes target. Right now I guess it's in an awful worst-of-both-worlds state, but I was hoping to get the functional change to the generated code done so I could just do the infrastructure change. That lets you actually invoke I think for now if we just pass |
Yeah, appending |
Is this a functional work-around for now? diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index cce360236960..277ad9816411 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -132,7 +132,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
BUILD_RPATH ""
INSTALL_RPATH ""
RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
- target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}")
+ target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}" "-march=''")
target_link_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}"
"-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
install(TARGETS libomptarget-${target_name} |
No, I'm afraid that didn't change anything. However, it did if I added it to That said, you want to instead: --- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -132,7 +132,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
BUILD_RPATH ""
INSTALL_RPATH ""
RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
- target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}")
+ target_compile_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}" "-march=")
target_link_options(libomptarget-${target_name} PRIVATE "--target=${target_triple}"
- "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
+ "-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm" "-march=")
install(TARGETS libomptarget-${target_name} without the |
Alright, thanks. Do you want to make a PR for that? Sorry this is a little disruptive but I think this is long overdue. After I land the follow up you'll need to do a separate standalone build for the GPU portions, since they'd be considered different cross-compiling libraries. It's a much more straightforward model I'm trying to move all the existing GPU runtimes to. |
Yeah, I can do that. I suppose a separate build would be cleaner in the long run — and hopefully it will let us build the CPU part cleanly with GCC xP. |
Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: llvm#126143 (comment)
Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: #126143 (comment)
A naive question from someone who is not familiar with this area: Is any of this stuff usable with anything but a matching version of clang? If no, can we place these things in the clang resource directory, where the other version-bound runtimes live? |
It's not intended, since we tend to use clang features as we add them. My understanding is that language runtimes go in the normal |
|
||
add_library(omptarget.${target_name}.all_objs OBJECT IMPORTED) | ||
set_property(TARGET omptarget.${target_name}.all_objs APPEND PROPERTY IMPORTED_OBJECTS | ||
${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}/libomptarget-${target_name}.bc) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Getting
make[5]: *** No rule to make target '/vast/users/yeluo/opt/llvm-clang/build_mirror_offload_nightly/lib/libomptarget-amdgpu.bc', needed by '/vast/users/yeluo/opt/llvm-clang/build_mirror_offload_nightly/lib/amdgcn-amd-amdhsa/libompdevice.a'. Stop.
make[4]: *** [CMakeFiles/Makefile2:17388: offload/DeviceRTL/CMakeFiles/omptarget.amdgpu.dir/all] Error 2
target files produced within the project, libomptarget-amdgpu.bc in this case, cannot be used as imported because such files may not exist when it is needed, in this case by omptarget.amdgpu
target.
…442) Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: llvm/llvm-project#126143 (comment)
…ibrary (llvm#126143) Summary: Currently, we build a single `libomptarget.devicertl.a` which is a fatbinary. It is a host object file that contains the embedded archive files for both the NVIDIA and AMDGPU targets. This was done primarily as a convenience due to naming conflicts. Now that the clang driver for the GPU targets can appropriate link via the per-target runtime-dir, we can just make two separate static libraries and remove the indirection. This patch creates two new static libraries that get installed into ``` /lib/amdgcn-amd-amdhsa/libomp.a /lib/nvptx64-nvidia-cuda/libomp.a ``` for AMDGPU and NVPTX respectively. The link job created by the linker wrapper now simply needs to do `-lomp` and it will search those directories and link those static libraries. This requires far less special handling. This patch is a precursor to changing the build system entirely to be a runtimes based one. Soon this target will be a standard `add_library` and done through the GPU runtime targets. NOTE that this actually does remove an additional optimization step. Previously we merged all of the files into a single bitcode object and forcibly internalized some definitions. This, instead, just treats them like a normal static library. This may possibly affect performance for some files, but I think it's better overall to use static library semantics because it allows us to have an 'include-what-you-use' relationship with the library. Performance testing will be required. If we really need the merged blob then we can simply pack that into a new static library.
Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: llvm#126143 (comment)
Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: llvm#126143 (comment)
…ibrary (llvm#126143) Summary: Currently, we build a single `libomptarget.devicertl.a` which is a fatbinary. It is a host object file that contains the embedded archive files for both the NVIDIA and AMDGPU targets. This was done primarily as a convenience due to naming conflicts. Now that the clang driver for the GPU targets can appropriate link via the per-target runtime-dir, we can just make two separate static libraries and remove the indirection. This patch creates two new static libraries that get installed into ``` /lib/amdgcn-amd-amdhsa/libomp.a /lib/nvptx64-nvidia-cuda/libomp.a ``` for AMDGPU and NVPTX respectively. The link job created by the linker wrapper now simply needs to do `-lomp` and it will search those directories and link those static libraries. This requires far less special handling. This patch is a precursor to changing the build system entirely to be a runtimes based one. Soon this target will be a standard `add_library` and done through the GPU runtime targets. NOTE that this actually does remove an additional optimization step. Previously we merged all of the files into a single bitcode object and forcibly internalized some definitions. This, instead, just treats them like a normal static library. This may possibly affect performance for some files, but I think it's better overall to use static library semantics because it allows us to have an 'include-what-you-use' relationship with the library. Performance testing will be required. If we really need the merged blob then we can simply pack that into a new static library.
Unset `-march` when invoking the compiler and linker to build the GPU libraries. These libraries use GPU targets rather than the CPU targets, and an incidental `-march=native` causes Clang to be able to determine the GPU used — which causes the build to fail when there is no GPU available. Resetting `-march=` should suffice to revert to building generic code for the time being. See the discussion in: llvm#126143 (comment)
Summary:
Currently, we build a single
libomptarget.devicertl.a
which is afatbinary. It is a host object file that contains the embedded archive
files for both the NVIDIA and AMDGPU targets. This was done primarily as
a convenience due to naming conflicts. Now that the clang driver for the
GPU targets can appropriate link via the per-target runtime-dir, we can
just make two separate static libraries and remove the indirection.
This patch creates two new static libraries that get installed into
for AMDGPU and NVPTX respectively. The link job created by the linker
wrapper now simply needs to do
-lomp
and it will search thosedirectories and link those static libraries. This requires far less
special handling.
This patch is a precursor to changing the build system entirely to be a
runtimes based one. Soon this target will be a standard
add_library
and done through the GPU runtime targets.
NOTE that this actually does remove an additional optimization step.
Previously we merged all of the files into a single bitcode object and
forcibly internalized some definitions. This, instead, just treats them
like a normal static library. This may possibly affect performance for
some files, but I think it's better overall to use static library
semantics because it allows us to have an 'include-what-you-use'
relationship with the library.
Performance testing will be required. If we really need the merged blob
then we can simply pack that into a new static library.