Skip to content

Commit 09ad632

Browse files
authored
[SYCL][RTC] Initial support for device globals (#16565)
Adds limited support for device globals in runtime-compiled SYCL code. The application interacts with the globals via three new methods on `kernel_bundle`: ```c++ bool ext_oneapi_has_device_global(const std::string &name); void *ext_oneapi_get_device_global_address(const std::string &name, const device &dev); // return a USM pointer suitable for queue::memcpy etc. size_t ext_oneapi_get_device_global_size(const std::string &name); ``` This PR uses the same trick as #16316, i.e. prepending a kernel-bundle-specific prefix to the names of device globals to make them distinguishable for the program manager. Limitations: - Device globals inside a namespace are unsupported due to insufficient name mangling. - Device globals with the `device_image_scope` property cannot be read/written from the host, because the runtime currently cannot expose USM pointers for them. A workaround is using explicit kernels to read/write the global's value into a USM buffer. --------- Signed-off-by: Julian Oppermann <[email protected]>
1 parent 77e110d commit 09ad632

File tree

11 files changed

+415
-8
lines changed

11 files changed

+415
-8
lines changed

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,9 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
692692
/*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints);
693693
assert(Splitter->hasMoreSplits());
694694

695-
// TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
696-
// be processed.
695+
if (auto Err = Splitter->verifyNoCrossModuleDeviceGlobalUsage()) {
696+
return std::move(Err);
697+
}
697698

698699
SmallVector<RTCDevImgInfo> DevImgInfoVec;
699700
SmallVector<std::unique_ptr<llvm::Module>> Modules;
@@ -734,7 +735,7 @@ jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
734735
GlobalBinImageProps PropReq{
735736
/*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true,
736737
/*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true,
737-
/*DeviceGlobals=*/false};
738+
/*DeviceGlobals=*/true};
738739
PropertySetRegistry Properties =
739740
computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq);
740741

sycl/include/sycl/kernel_bundle.hpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,19 @@ class __SYCL_EXPORT kernel_bundle_plain {
242242
ext_oneapi_get_raw_kernel_name(detail::string_view{name}).c_str()};
243243
}
244244

245+
bool ext_oneapi_has_device_global(const std::string &name) {
246+
return ext_oneapi_has_device_global(detail::string_view{name});
247+
}
248+
249+
void *ext_oneapi_get_device_global_address(const std::string &name,
250+
const device &dev) {
251+
return ext_oneapi_get_device_global_address(detail::string_view{name}, dev);
252+
}
253+
254+
size_t ext_oneapi_get_device_global_size(const std::string &name) {
255+
return ext_oneapi_get_device_global_size(detail::string_view{name});
256+
}
257+
245258
protected:
246259
// \returns a kernel object which represents the kernel identified by
247260
// kernel_id passed
@@ -271,6 +284,11 @@ class __SYCL_EXPORT kernel_bundle_plain {
271284
bool ext_oneapi_has_kernel(detail::string_view name);
272285
kernel ext_oneapi_get_kernel(detail::string_view name);
273286
detail::string ext_oneapi_get_raw_kernel_name(detail::string_view name);
287+
288+
bool ext_oneapi_has_device_global(detail::string_view name);
289+
void *ext_oneapi_get_device_global_address(detail::string_view name,
290+
const device &dev);
291+
size_t ext_oneapi_get_device_global_size(detail::string_view name);
274292
};
275293

276294
} // namespace detail
@@ -501,6 +519,43 @@ class kernel_bundle : public detail::kernel_bundle_plain,
501519
return detail::kernel_bundle_plain::ext_oneapi_get_raw_kernel_name(name);
502520
}
503521

522+
/////////////////////////
523+
// ext_oneapi_has_device_global
524+
// only true if kernel_bundle was created from source and has this device
525+
// global
526+
/////////////////////////
527+
template <bundle_state _State = State,
528+
typename = std::enable_if_t<_State == bundle_state::executable>>
529+
bool ext_oneapi_has_device_global(const std::string &name) {
530+
return detail::kernel_bundle_plain::ext_oneapi_has_device_global(name);
531+
}
532+
533+
/////////////////////////
534+
// ext_oneapi_get_device_global_address
535+
// kernel_bundle must be created from source, throws if bundle was not built
536+
// for this device, or device global is either not present or has
537+
// `device_image_scope` property.
538+
// Returns a USM pointer to the variable's initialized storage on the device.
539+
/////////////////////////
540+
template <bundle_state _State = State,
541+
typename = std::enable_if_t<_State == bundle_state::executable>>
542+
void *ext_oneapi_get_device_global_address(const std::string &name,
543+
const device &dev) {
544+
return detail::kernel_bundle_plain::ext_oneapi_get_device_global_address(
545+
name, dev);
546+
}
547+
548+
/////////////////////////
549+
// ext_oneapi_get_device_global_size
550+
// kernel_bundle must be created from source, throws if device global is not
551+
// present. Returns the variable's size in bytes.
552+
/////////////////////////
553+
template <bundle_state _State = State,
554+
typename = std::enable_if_t<_State == bundle_state::executable>>
555+
size_t ext_oneapi_get_device_global_size(const std::string &name) {
556+
return detail::kernel_bundle_plain::ext_oneapi_get_device_global_size(name);
557+
}
558+
504559
private:
505560
kernel_bundle(detail::KernelBundleImplPtr Impl)
506561
: kernel_bundle_plain(std::move(Impl)) {}

sycl/source/detail/context_impl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,11 @@ void context_impl::addAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
329329
MAssociatedDeviceGlobals.insert(DeviceGlobalPtr);
330330
}
331331

332+
void context_impl::removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr) {
333+
std::lock_guard<std::mutex> Lock{MAssociatedDeviceGlobalsMutex};
334+
MAssociatedDeviceGlobals.erase(DeviceGlobalPtr);
335+
}
336+
332337
void context_impl::addDeviceGlobalInitializer(
333338
ur_program_handle_t Program, const std::vector<device> &Devs,
334339
const RTDeviceBinaryImage *BinImage) {
@@ -407,6 +412,9 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
407412
// Device global map entry pointers will not die before the end of the
408413
// program and the pointers will stay the same, so we do not need
409414
// m_DeviceGlobalsMutex here.
415+
// The lifetimes of device global map entries representing globals in
416+
// runtime-compiled code will be tied to the kernel bundle, so the
417+
// assumption holds in that setting as well.
410418
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
411419
// Get or allocate the USM memory associated with the device global.
412420
DeviceGlobalUSMMem &DeviceGlobalUSM =

sycl/source/detail/context_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ class context_impl {
203203
/// Adds an associated device global to the tracked associates.
204204
void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr);
205205

206+
/// Removes an associated device global from the tracked associates.
207+
void removeAssociatedDeviceGlobal(const void *DeviceGlobalPtr);
208+
206209
/// Adds a device global initializer.
207210
void addDeviceGlobalInitializer(ur_program_handle_t Program,
208211
const std::vector<device> &Devs,

sycl/source/detail/jit_compiler.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1170,14 +1170,18 @@ sycl_device_binaries jit_compiler::createDeviceBinaries(
11701170
}
11711171

11721172
for (const auto &FPS : DevImgInfo.Properties) {
1173+
bool IsDeviceGlobalsPropSet =
1174+
FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS;
11731175
PropertySetContainer PropSet{FPS.Name.c_str()};
11741176
for (const auto &FPV : FPS.Values) {
11751177
if (FPV.IsUIntValue) {
11761178
PropSet.addProperty(
11771179
PropertyContainer{FPV.Name.c_str(), FPV.UIntValue});
11781180
} else {
1181+
std::string PrefixedName =
1182+
(IsDeviceGlobalsPropSet ? Prefix : "") + FPV.Name.c_str();
11791183
PropSet.addProperty(PropertyContainer{
1180-
FPV.Name.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
1184+
PrefixedName.c_str(), FPV.Bytes.begin(), FPV.Bytes.size(),
11811185
sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY});
11821186
}
11831187
}

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <cstdint>
2626
#include <cstring>
2727
#include <memory>
28+
#include <unordered_set>
2829
#include <vector>
2930

3031
#include "split_string.hpp"
@@ -383,6 +384,8 @@ class kernel_bundle_impl {
383384
const std::vector<kernel_id> &KernelIDs,
384385
std::vector<std::string> &&KernelNames,
385386
std::unordered_map<std::string, std::string> &&MangledKernelNames,
387+
std::vector<std::string> &&DeviceGlobalNames,
388+
std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
386389
sycl_device_binaries Binaries, std::string &&Prefix,
387390
syclex::source_language Lang)
388391
: kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -396,6 +399,8 @@ class kernel_bundle_impl {
396399
MIsInterop = true;
397400
MKernelNames = std::move(KernelNames);
398401
MMangledKernelNames = std::move(MangledKernelNames);
402+
MDeviceGlobalNames = std::move(DeviceGlobalNames);
403+
MDeviceGlobalAllocations = std::move(DeviceGlobalAllocations);
399404
MDeviceBinaries = Binaries;
400405
MPrefix = std::move(Prefix);
401406
MLanguage = Lang;
@@ -546,6 +551,12 @@ class kernel_bundle_impl {
546551
std::vector<kernel_id> KernelIDs;
547552
std::vector<std::string> KernelNames;
548553
std::unordered_map<std::string, std::string> MangledKernelNames;
554+
555+
std::unordered_set<std::string> DeviceGlobalIDSet;
556+
std::vector<std::string> DeviceGlobalIDVec;
557+
std::vector<std::string> DeviceGlobalNames;
558+
std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
559+
549560
for (const auto &KernelID : PM.getAllSYCLKernelIDs()) {
550561
std::string_view KernelName{KernelID.get_name()};
551562
if (KernelName.find(Prefix) == 0) {
@@ -563,8 +574,8 @@ class kernel_bundle_impl {
563574
}
564575
}
565576

566-
// Apply frontend information.
567577
for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) {
578+
// Mangled names.
568579
for (const sycl_device_binary_property &RKProp :
569580
RawImg->getRegisteredKernels()) {
570581

@@ -574,11 +585,49 @@ class kernel_bundle_impl {
574585
reinterpret_cast<const char *>(BA.begin()), MangledNameLen};
575586
MangledKernelNames.emplace(RKProp->Name, MangledName);
576587
}
588+
589+
// Device globals.
590+
for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) {
591+
std::string_view DeviceGlobalName{DeviceGlobalProp->Name};
592+
assert(DeviceGlobalName.find(Prefix) == 0);
593+
bool Inserted = false;
594+
std::tie(std::ignore, Inserted) =
595+
DeviceGlobalIDSet.emplace(DeviceGlobalName);
596+
if (Inserted) {
597+
DeviceGlobalIDVec.emplace_back(DeviceGlobalName);
598+
DeviceGlobalName.remove_prefix(Prefix.length());
599+
DeviceGlobalNames.emplace_back(DeviceGlobalName);
600+
}
601+
}
602+
}
603+
604+
// Device globals are usually statically allocated and registered in the
605+
// integration footer, which we don't have in the RTC context. Instead, we
606+
// dynamically allocate storage tied to the executable kernel bundle.
607+
for (DeviceGlobalMapEntry *DeviceGlobalEntry :
608+
PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) {
609+
610+
size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value
611+
if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) {
612+
// Consider storage for device USM pointer.
613+
AllocSize += sizeof(void *);
614+
}
615+
auto Alloc = std::make_unique<std::byte[]>(AllocSize);
616+
std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId};
617+
PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data());
618+
DeviceGlobalAllocations.push_back(std::move(Alloc));
619+
620+
// Drop the RTC prefix from the entry's symbol name. Note that the PM
621+
// still manages this device global under its prefixed name.
622+
assert(DeviceGlobalName.find(Prefix) == 0);
623+
DeviceGlobalName.remove_prefix(Prefix.length());
624+
DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
577625
}
578626

579627
return std::make_shared<kernel_bundle_impl>(
580628
MContext, MDevices, KernelIDs, std::move(KernelNames),
581-
std::move(MangledKernelNames), Binaries, std::move(Prefix),
629+
std::move(MangledKernelNames), std::move(DeviceGlobalNames),
630+
std::move(DeviceGlobalAllocations), Binaries, std::move(Prefix),
582631
MLanguage);
583632
}
584633

@@ -680,6 +729,8 @@ class kernel_bundle_impl {
680729
KernelNames, MLanguage);
681730
}
682731

732+
// Utility methods for kernel_compiler functionality
733+
private:
683734
std::string adjust_kernel_name(const std::string &Name) {
684735
if (MLanguage == syclex::source_language::sycl) {
685736
auto It = MMangledKernelNames.find(Name);
@@ -694,8 +745,58 @@ class kernel_bundle_impl {
694745
MKernelNames.end();
695746
}
696747

748+
std::string mangle_device_global_name(const std::string &Name) {
749+
// TODO: Support device globals declared in namespaces.
750+
return "_Z" + std::to_string(Name.length()) + Name;
751+
}
752+
753+
DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name) {
754+
if (MKernelNames.empty() || MLanguage != syclex::source_language::sycl) {
755+
throw sycl::exception(make_error_code(errc::invalid),
756+
"Querying device globals by name is only available "
757+
"in kernel_bundles successfully built from "
758+
"kernel_bundle<bundle_state>::ext_oneapi_source> "
759+
"with 'sycl' source language.");
760+
}
761+
762+
if (!ext_oneapi_has_device_global(Name)) {
763+
throw sycl::exception(make_error_code(errc::invalid),
764+
"device global '" + Name +
765+
"' not found in kernel_bundle");
766+
}
767+
768+
std::vector<DeviceGlobalMapEntry *> Entries =
769+
ProgramManager::getInstance().getDeviceGlobalEntries(
770+
{MPrefix + mangle_device_global_name(Name)});
771+
assert(Entries.size() == 1);
772+
return Entries.front();
773+
}
774+
775+
void unregister_device_globals_from_context() {
776+
if (MDeviceGlobalNames.empty())
777+
return;
778+
779+
// Manually trigger the release of resources for all device global map
780+
// entries associated with this runtime-compiled bundle. Normally, this
781+
// would happen in `~context_impl()`, however in the RTC setting, the
782+
// context outlives the DG map entries owned by the program manager.
783+
784+
std::vector<std::string> DeviceGlobalIDs;
785+
std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(),
786+
std::back_inserter(DeviceGlobalIDs),
787+
[&](const std::string &DGName) { return MPrefix + DGName; });
788+
auto ContextImpl = getSyclObjImpl(MContext);
789+
for (DeviceGlobalMapEntry *Entry :
790+
ProgramManager::getInstance().getDeviceGlobalEntries(
791+
DeviceGlobalIDs)) {
792+
Entry->removeAssociatedResources(ContextImpl.get());
793+
ContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr);
794+
}
795+
}
796+
797+
public:
697798
bool ext_oneapi_has_kernel(const std::string &Name) {
698-
return is_kernel_name(adjust_kernel_name(Name));
799+
return !MKernelNames.empty() && is_kernel_name(adjust_kernel_name(Name));
699800
}
700801

701802
kernel
@@ -768,6 +869,41 @@ class kernel_bundle_impl {
768869
return AdjustedName;
769870
}
770871

872+
bool ext_oneapi_has_device_global(const std::string &Name) {
873+
return !MDeviceGlobalNames.empty() &&
874+
std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(),
875+
mangle_device_global_name(Name)) !=
876+
MDeviceGlobalNames.end();
877+
}
878+
879+
void *ext_oneapi_get_device_global_address(const std::string &Name,
880+
const device &Dev) {
881+
DeviceGlobalMapEntry *Entry = get_device_global_entry(Name);
882+
883+
if (std::find(MDevices.begin(), MDevices.end(), Dev) == MDevices.end()) {
884+
throw sycl::exception(make_error_code(errc::invalid),
885+
"kernel_bundle not built for device");
886+
}
887+
888+
if (Entry->MIsDeviceImageScopeDecorated) {
889+
throw sycl::exception(make_error_code(errc::invalid),
890+
"Cannot query USM pointer for device global with "
891+
"'device_image_scope' property");
892+
}
893+
894+
// TODO: Add context-only initialization via `urUSMContextMemcpyExp` instead
895+
// of using a throw-away queue.
896+
queue InitQueue{MContext, Dev};
897+
auto &USMMem =
898+
Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue));
899+
InitQueue.wait_and_throw();
900+
return USMMem.getPtr();
901+
}
902+
903+
size_t ext_oneapi_get_device_global_size(const std::string &Name) {
904+
return get_device_global_entry(Name)->MDeviceGlobalTSize;
905+
}
906+
771907
bool empty() const noexcept { return MDeviceImages.empty(); }
772908

773909
backend get_backend() const noexcept {
@@ -999,6 +1135,7 @@ class kernel_bundle_impl {
9991135
~kernel_bundle_impl() {
10001136
try {
10011137
if (MDeviceBinaries) {
1138+
unregister_device_globals_from_context();
10021139
ProgramManager::getInstance().removeImages(MDeviceBinaries);
10031140
syclex::detail::SYCL_JIT_Destroy(MDeviceBinaries);
10041141
}
@@ -1039,6 +1176,8 @@ class kernel_bundle_impl {
10391176
// only kernel_bundles created from source have KernelNames member.
10401177
std::vector<std::string> MKernelNames;
10411178
std::unordered_map<std::string, std::string> MMangledKernelNames;
1179+
std::vector<std::string> MDeviceGlobalNames;
1180+
std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
10421181
sycl_device_binaries MDeviceBinaries = nullptr;
10431182
std::string MPrefix;
10441183
include_pairs_t MIncludePairs;

sycl/source/kernel_bundle.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,21 @@ kernel_bundle_plain::ext_oneapi_get_raw_kernel_name(detail::string_view name) {
142142
return detail::string{impl->ext_oneapi_get_raw_kernel_name(name.data())};
143143
}
144144

145+
bool kernel_bundle_plain::ext_oneapi_has_device_global(
146+
detail::string_view name) {
147+
return impl->ext_oneapi_has_device_global(name.data());
148+
}
149+
150+
void *kernel_bundle_plain::ext_oneapi_get_device_global_address(
151+
detail::string_view name, const device &dev) {
152+
return impl->ext_oneapi_get_device_global_address(name.data(), dev);
153+
}
154+
155+
size_t kernel_bundle_plain::ext_oneapi_get_device_global_size(
156+
detail::string_view name) {
157+
return impl->ext_oneapi_get_device_global_size(name.data());
158+
}
159+
145160
//////////////////////////////////
146161
///// sycl::detail free functions
147162
//////////////////////////////////

0 commit comments

Comments
 (0)