Skip to content

[SYCL][RTC] Initial support for device globals #16565

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
862c187
[SYCL][RTC] Initial support for device globals
jopperm Jan 9, 2025
e4ef41c
Add `ext_oneapi_has_device_global`.
jopperm Jan 10, 2025
d45415d
Add get_address, get_size, and attempt to design a copy-API.
jopperm Jan 13, 2025
e6d205b
Add symbols to dump (linux only for now)
jopperm Jan 13, 2025
4d0d08d
Drop proposed copy methods on kernel_bundle
jopperm Jan 13, 2025
03c24ca
Add proper device check
jopperm Jan 13, 2025
00dd1b2
Return USM pointer from ext_oneapi_get_device_global_address
jopperm Jan 13, 2025
1aaa3b8
Add missing methods on kernel_bundle
jopperm Jan 13, 2025
4b7dd5c
Add test for device_image_scope globals.
jopperm Jan 13, 2025
1f57684
Windows symbols
jopperm Jan 14, 2025
afba729
Move device global tests to separate file and mark unsupported on ope…
jopperm Jan 15, 2025
d1177c8
Bump sycl.hpp counter for added RTC test.
jopperm Jan 15, 2025
e96e7e0
Use bundle's context for adhoc queue.
jopperm Jan 15, 2025
5c529aa
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Jan 16, 2025
dd081f6
Fix unused variable in structured bindings for old GCC
jopperm Jan 16, 2025
e5df76d
Use unordered set
jopperm Jan 24, 2025
104123e
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Jan 24, 2025
880b5ad
Bump sycl.hpp counter again
jopperm Jan 24, 2025
a8868fb
Address feedback.
jopperm Jan 28, 2025
7ec7639
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Jan 28, 2025
e2e0cc7
Update REQUIRES and UNSUPPORTED tags in test
jopperm Jan 28, 2025
2ea6266
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Feb 28, 2025
6409766
Untangle destruction of device global map entries
jopperm Mar 12, 2025
ae8f617
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Mar 12, 2025
92dce3d
Drop device from queries
jopperm Mar 12, 2025
265eee1
Revert format change
jopperm Mar 12, 2025
a0ab174
Windows symbols
jopperm Mar 12, 2025
62d223b
Update sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp
jopperm Mar 12, 2025
1be7568
Fail a bit earlier and more consistently
jopperm Mar 12, 2025
46261f0
Merge remote-tracking branch 'upstream/sycl' into rtc-devglobs
jopperm Mar 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,8 +657,9 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
/*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints);
assert(Splitter->hasMoreSplits());

// TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
// be processed.
if (auto Err = Splitter->verifyNoCrossModuleDeviceGlobalUsage()) {
return std::move(Err);
}

SmallVector<RTCDevImgInfo> DevImgInfoVec;
SmallVector<std::unique_ptr<llvm::Module>> Modules;
Expand Down Expand Up @@ -698,7 +699,7 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
GlobalBinImageProps PropReq{
/*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true,
/*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true,
/*DeviceGlobals=*/false};
/*DeviceGlobals=*/true};
PropertySetRegistry Properties =
computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq);
// TODO: Manually add `compile_target` property as in
Expand Down
55 changes: 55 additions & 0 deletions sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,19 @@ class __SYCL_EXPORT kernel_bundle_plain {
ext_oneapi_get_raw_kernel_name(detail::string_view{name}).c_str()};
}

bool ext_oneapi_has_device_global(const std::string &name) {
return ext_oneapi_has_device_global(detail::string_view{name});
}

void *ext_oneapi_get_device_global_address(const std::string &name,
const device &dev) {
return ext_oneapi_get_device_global_address(detail::string_view{name}, dev);
}

size_t ext_oneapi_get_device_global_size(const std::string &name) {
return ext_oneapi_get_device_global_size(detail::string_view{name});
}

protected:
// \returns a kernel object which represents the kernel identified by
// kernel_id passed
Expand Down Expand Up @@ -271,6 +284,11 @@ class __SYCL_EXPORT kernel_bundle_plain {
bool ext_oneapi_has_kernel(detail::string_view name);
kernel ext_oneapi_get_kernel(detail::string_view name);
detail::string ext_oneapi_get_raw_kernel_name(detail::string_view name);

bool ext_oneapi_has_device_global(detail::string_view name);
void *ext_oneapi_get_device_global_address(detail::string_view name,
const device &dev);
size_t ext_oneapi_get_device_global_size(detail::string_view name);
};

} // namespace detail
Expand Down Expand Up @@ -501,6 +519,43 @@ class kernel_bundle : public detail::kernel_bundle_plain,
return detail::kernel_bundle_plain::ext_oneapi_get_raw_kernel_name(name);
}

/////////////////////////
// ext_oneapi_has_device_global
// only true if kernel_bundle was created from source and has this device
// global
/////////////////////////
template <bundle_state _State = State,
typename = std::enable_if_t<_State == bundle_state::executable>>
bool ext_oneapi_has_device_global(const std::string &name) {
return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name);
}

/////////////////////////
// ext_oneapi_get_device_global_address
// kernel_bundle must be created from source, throws if bundle was not built
// for this device, or device global is either not present or has
// `device_image_scope` property.
// Returns a USM pointer to the variable's initialized storage on the device.
/////////////////////////
template <bundle_state _State = State,
typename = std::enable_if_t<_State == bundle_state::executable>>
void *ext_oneapi_get_device_global_address(const std::string &name,
const device &dev) {
return detail::kernel_bundle_plain::ext_oneapi_get_device_global_address(
name, dev);
}

/////////////////////////
// ext_oneapi_get_device_global_size
// kernel_bundle must be created from source, throws if device global is not
// present. Returns the variable's size in bytes.
/////////////////////////
template <bundle_state _State = State,
typename = std::enable_if_t<_State == bundle_state::executable>>
size_t ext_oneapi_get_device_global_size(const std::string &name) {
return detail::kernel_bundle_plain::ext_oneapi_get_device_global_size(name);
}

private:
kernel_bundle(detail::KernelBundleImplPtr Impl)
: kernel_bundle_plain(std::move(Impl)) {}
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ void context_impl::addAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
MAssociatedDeviceGlobals.insert(DeviceGlobalPtr);
}

void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
std::lock_guard<std::mutex> Lock{MAssociatedDeviceGlobalsMutex};
MAssociatedDeviceGlobals.erase(DeviceGlobalPtr);
}

void context_impl::addDeviceGlobalInitializer(
ur_program_handle_t Program, const std::vector<device> &Devs,
const RTDeviceBinaryImage *BinImage) {
Expand Down Expand Up @@ -407,6 +412,9 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
// Device global map entry pointers will not die before the end of the
// program and the pointers will stay the same, so we do not need
// m_DeviceGlobalsMutex here.
// The lifetimes of device global map entries representing globals in
// runtime-compiled code will be tied to the kernel bundle, so the
// assumption holds in that setting as well.
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
// Get or allocate the USM memory associated with the device global.
DeviceGlobalUSMMem &DeviceGlobalUSM =
Expand Down
3 changes: 3 additions & 0 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ class context_impl {
/// Adds an associated device global to the tracked associates.
void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr);

/// Removes an associated device global from the tracked associates.
void removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr);

/// Adds a device global initializer.
void addDeviceGlobalInitializer(ur_program_handle_t Program,
const std::vector<device> &Devs,
Expand Down
6 changes: 5 additions & 1 deletion sycl/source/detail/jit_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,14 +1170,18 @@ sycl_device_binaries jit_compiler::createDeviceBinaries(
}

for (const auto &FPS : DevImgInfo.Properties) {
bool IsDeviceGlobalsPropSet =
FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS;
PropertySetContainer PropSet{FPS.Name.c_str()};
for (const auto &FPV : FPS.Values) {
if (FPV.IsUIntValue) {
PropSet.addProperty(
PropertyContainer{FPV.Name.c_str(), FPV.UIntValue});
} else {
std::string PrefixedName =
(IsDeviceGlobalsPropSet ? Prefix : "") + FPV.Name.c_str();
PropSet.addProperty(PropertyContainer{
FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
PrefixedName.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY});
}
}
Expand Down
145 changes: 142 additions & 3 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cstdint>
#include <cstring>
#include <memory>
#include <unordered_set>
#include <vector>

#include "split_string.hpp"
Expand Down Expand Up @@ -383,6 +384,8 @@ class kernel_bundle_impl {
const std::vector<kernel_id> &KernelIDs,
std::vector<std::string> &&KernelNames,
std::unordered_map<std::string, std::string> &&MangledKernelNames,
std::vector<std::string> &&DeviceGlobalNames,
std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
sycl_device_binaries Binaries, std::string &&Prefix,
syclex::source_language Lang)
: kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
Expand All @@ -396,6 +399,8 @@ class kernel_bundle_impl {
MIsInterop = true;
MKernelNames = std::move(KernelNames);
MMangledKernelNames = std::move(MangledKernelNames);
MDeviceGlobalNames = std::move(DeviceGlobalNames);
MDeviceGlobalAllocations = std::move(DeviceGlobalAllocations);
MDeviceBinaries = Binaries;
MPrefix = std::move(Prefix);
MLanguage = Lang;
Expand Down Expand Up @@ -546,6 +551,12 @@ class kernel_bundle_impl {
std::vector<kernel_id> KernelIDs;
std::vector<std::string> KernelNames;
std::unordered_map<std::string, std::string> MangledKernelNames;

std::unordered_set<std::string> DeviceGlobalIDSet;
std::vector<std::string> DeviceGlobalIDVec;
std::vector<std::string> DeviceGlobalNames;
std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;

for (const auto &KernelID : PM.getAllSYCLKernelIDs()) {
std::string_view KernelName{KernelID.get_name()};
if (KernelName.find(Prefix) == 0) {
Expand All @@ -563,8 +574,8 @@ class kernel_bundle_impl {
}
}

// Apply frontend information.
for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) {
// Mangled names.
for (const sycl_device_binary_property &RKProp :
RawImg->getRegisteredKernels()) {

Expand All @@ -574,11 +585,49 @@ class kernel_bundle_impl {
reinterpret_cast<const char *>(BA.begin()), MangledNameLen};
MangledKernelNames.emplace(RKProp->Name, MangledName);
}

// Device globals.
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) {
std::string_view DeviceGlobalName{DeviceGlobalProp->Name};
assert(DeviceGlobalName.find(Prefix) == 0);
bool Inserted = false;
std::tie(std::ignore, Inserted) =
DeviceGlobalIDSet.emplace(DeviceGlobalName);
if (Inserted) {
DeviceGlobalIDVec.emplace_back(DeviceGlobalName);
DeviceGlobalName.remove_prefix(Prefix.length());
DeviceGlobalNames.emplace_back(DeviceGlobalName);
}
}
}

// Device globals are usually statically allocated and registered in the
// integration footer, which we don't have in the RTC context. Instead, we
// dynamically allocate storage tied to the executable kernel bundle.
for (DeviceGlobalMapEntry *DeviceGlobalEntry :
PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) {

size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) {
// Consider storage for device USM pointer.
AllocSize += sizeof(void *);
}
auto Alloc = std::make_unique<std::byte[]>(AllocSize);
std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId};
PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data());
DeviceGlobalAllocations.push_back(std::move(Alloc));

// Drop the RTC prefix from the entry's symbol name. Note that the PM
// still manages this device global under its prefixed name.
assert(DeviceGlobalName.find(Prefix) == 0);
DeviceGlobalName.remove_prefix(Prefix.length());
DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
}

return std::make_shared<kernel_bundle_impl>(
MContext, MDevices, KernelIDs, std::move(KernelNames),
std::move(MangledKernelNames), Binaries, std::move(Prefix),
std::move(MangledKernelNames), std::move(DeviceGlobalNames),
std::move(DeviceGlobalAllocations), Binaries, std::move(Prefix),
MLanguage);
}

Expand Down Expand Up @@ -680,6 +729,8 @@ class kernel_bundle_impl {
KernelNames, MLanguage);
}

// Utility methods for kernel_compiler functionality
private:
std::string adjust_kernel_name(const std::string &Name) {
if (MLanguage == syclex::source_language::sycl) {
auto It = MMangledKernelNames.find(Name);
Expand All @@ -694,8 +745,58 @@ class kernel_bundle_impl {
MKernelNames.end();
}

std::string mangle_device_global_name(const std::string &Name) {
// TODO: Support device globals declared in namespaces.
return "_Z" + std::to_string(Name.length()) + Name;
}

DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name) {
if (MKernelNames.empty() || MLanguage != syclex::source_language::sycl) {
throw sycl::exception(make_error_code(errc::invalid),
"Querying device globals by name is only available "
"in kernel_bundles successfully built from "
"kernel_bundle<bundle_state>::ext_oneapi_source> "
"with 'sycl' source language.");
}

if (!ext_oneapi_has_device_global(Name)) {
throw sycl::exception(make_error_code(errc::invalid),
"device global '" + Name +
"' not found in kernel_bundle");
}

std::vector<DeviceGlobalMapEntry *> Entries =
ProgramManager::getInstance().getDeviceGlobalEntries(
{MPrefix + mangle_device_global_name(Name)});
assert(Entries.size() == 1);
return Entries.front();
}

void unregister_device_globals_from_context() {
if (MDeviceGlobalNames.empty())
return;

// Manually trigger the release of resources for all device global map
// entries associated with this runtime-compiled bundle. Normally, this
// would happen in `~context_impl()`, however in the RTC setting, the
// context outlives the DG map entries owned by the program manager.

std::vector<std::string> DeviceGlobalIDs;
std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(),
std::back_inserter(DeviceGlobalIDs),
[&](const std::string &DGName) { return MPrefix + DGName; });
auto ContextImpl = getSyclObjImpl(MContext);
for (DeviceGlobalMapEntry *Entry :
ProgramManager::getInstance().getDeviceGlobalEntries(
DeviceGlobalIDs)) {
Entry->removeAssociatedResources(ContextImpl.get());
ContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr);
}
}

public:
bool ext_oneapi_has_kernel(const std::string &Name) {
return is_kernel_name(adjust_kernel_name(Name));
return !MKernelNames.empty() && is_kernel_name(adjust_kernel_name(Name));
}

kernel
Expand Down Expand Up @@ -768,6 +869,41 @@ class kernel_bundle_impl {
return AdjustedName;
}

bool ext_oneapi_has_device_global(const std::string &Name) {
return !MDeviceGlobalNames.empty() &&
std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(),
mangle_device_global_name(Name)) !=
MDeviceGlobalNames.end();
}

void *ext_oneapi_get_device_global_address(const std::string &Name,
const device &Dev) {
DeviceGlobalMapEntry *Entry = get_device_global_entry(Name);

if (std::find(MDevices.begin(), MDevices.end(), Dev) == MDevices.end()) {
throw sycl::exception(make_error_code(errc::invalid),
"kernel_bundle not built for device");
}

if (Entry->MIsDeviceImageScopeDecorated) {
throw sycl::exception(make_error_code(errc::invalid),
"Cannot query USM pointer for device global with "
"'device_image_scope' property");
}

// TODO: Add context-only initialization via `urUSMContextMemcpyExp` instead
// of using a throw-away queue.
queue InitQueue{MContext, Dev};
auto &USMMem =
Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue));
InitQueue.wait_and_throw();
return USMMem.getPtr();
}

size_t ext_oneapi_get_device_global_size(const std::string &Name) {
return get_device_global_entry(Name)->MDeviceGlobalTSize;
}

bool empty() const noexcept { return MDeviceImages.empty(); }

backend get_backend() const noexcept {
Expand Down Expand Up @@ -999,6 +1135,7 @@ class kernel_bundle_impl {
~kernel_bundle_impl() {
try {
if (MDeviceBinaries) {
unregister_device_globals_from_context();
ProgramManager::getInstance().removeImages(MDeviceBinaries);
syclex::detail::SYCL_JIT_Destroy(MDeviceBinaries);
}
Expand Down Expand Up @@ -1039,6 +1176,8 @@ class kernel_bundle_impl {
// only kernel_bundles created from source have KernelNames member.
std::vector<std::string> MKernelNames;
std::unordered_map<std::string, std::string> MMangledKernelNames;
std::vector<std::string> MDeviceGlobalNames;
std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
sycl_device_binaries MDeviceBinaries = nullptr;
std::string MPrefix;
include_pairs_t MIncludePairs;
Expand Down
15 changes: 15 additions & 0 deletions sycl/source/kernel_bundle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,21 @@ kernel_bundle_plain::ext_oneapi_get_raw_kernel_name(detail::string_view name) {
return detail::string{impl->ext_oneapi_get_raw_kernel_name(name.data())};
}

bool kernel_bundle_plain::ext_oneapi_has_device_global(
detail::string_view name) {
return impl->ext_oneapi_has_device_global(name.data());
}

void *kernel_bundle_plain::ext_oneapi_get_device_global_address(
detail::string_view name, const device &dev) {
return impl->ext_oneapi_get_device_global_address(name.data(), dev);
}

size_t kernel_bundle_plain::ext_oneapi_get_device_global_size(
detail::string_view name) {
return impl->ext_oneapi_get_device_global_size(name.data());
}

//////////////////////////////////
///// sycl::detail free functions
//////////////////////////////////
Expand Down
Loading