25
25
#include < cstdint>
26
26
#include < cstring>
27
27
#include < memory>
28
+ #include < unordered_set>
28
29
#include < vector>
29
30
30
31
#include " split_string.hpp"
@@ -383,6 +384,8 @@ class kernel_bundle_impl {
383
384
const std::vector<kernel_id> &KernelIDs,
384
385
std::vector<std::string> &&KernelNames,
385
386
std::unordered_map<std::string, std::string> &&MangledKernelNames,
387
+ std::vector<std::string> &&DeviceGlobalNames,
388
+ std::vector<std::unique_ptr<std::byte[]>> &&DeviceGlobalAllocations,
386
389
sycl_device_binaries Binaries, std::string &&Prefix,
387
390
syclex::source_language Lang)
388
391
: kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs,
@@ -396,6 +399,8 @@ class kernel_bundle_impl {
396
399
MIsInterop = true ;
397
400
MKernelNames = std::move (KernelNames);
398
401
MMangledKernelNames = std::move (MangledKernelNames);
402
+ MDeviceGlobalNames = std::move (DeviceGlobalNames);
403
+ MDeviceGlobalAllocations = std::move (DeviceGlobalAllocations);
399
404
MDeviceBinaries = Binaries;
400
405
MPrefix = std::move (Prefix);
401
406
MLanguage = Lang;
@@ -546,6 +551,12 @@ class kernel_bundle_impl {
546
551
std::vector<kernel_id> KernelIDs;
547
552
std::vector<std::string> KernelNames;
548
553
std::unordered_map<std::string, std::string> MangledKernelNames;
554
+
555
+ std::unordered_set<std::string> DeviceGlobalIDSet;
556
+ std::vector<std::string> DeviceGlobalIDVec;
557
+ std::vector<std::string> DeviceGlobalNames;
558
+ std::vector<std::unique_ptr<std::byte[]>> DeviceGlobalAllocations;
559
+
549
560
for (const auto &KernelID : PM.getAllSYCLKernelIDs ()) {
550
561
std::string_view KernelName{KernelID.get_name ()};
551
562
if (KernelName.find (Prefix) == 0 ) {
@@ -563,8 +574,8 @@ class kernel_bundle_impl {
563
574
}
564
575
}
565
576
566
- // Apply frontend information.
567
577
for (const auto *RawImg : PM.getRawDeviceImages (KernelIDs)) {
578
+ // Mangled names.
568
579
for (const sycl_device_binary_property &RKProp :
569
580
RawImg->getRegisteredKernels ()) {
570
581
@@ -574,11 +585,49 @@ class kernel_bundle_impl {
574
585
reinterpret_cast <const char *>(BA.begin ()), MangledNameLen};
575
586
MangledKernelNames.emplace (RKProp->Name , MangledName);
576
587
}
588
+
589
+ // Device globals.
590
+ for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals ()) {
591
+ std::string_view DeviceGlobalName{DeviceGlobalProp->Name };
592
+ assert (DeviceGlobalName.find (Prefix) == 0 );
593
+ bool Inserted = false ;
594
+ std::tie (std::ignore, Inserted) =
595
+ DeviceGlobalIDSet.emplace (DeviceGlobalName);
596
+ if (Inserted) {
597
+ DeviceGlobalIDVec.emplace_back (DeviceGlobalName);
598
+ DeviceGlobalName.remove_prefix (Prefix.length ());
599
+ DeviceGlobalNames.emplace_back (DeviceGlobalName);
600
+ }
601
+ }
602
+ }
603
+
604
+ // Device globals are usually statically allocated and registered in the
605
+ // integration footer, which we don't have in the RTC context. Instead, we
606
+ // dynamically allocate storage tied to the executable kernel bundle.
607
+ for (DeviceGlobalMapEntry *DeviceGlobalEntry :
608
+ PM.getDeviceGlobalEntries (DeviceGlobalIDVec)) {
609
+
610
+ size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize ; // init value
611
+ if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated ) {
612
+ // Consider storage for device USM pointer.
613
+ AllocSize += sizeof (void *);
614
+ }
615
+ auto Alloc = std::make_unique<std::byte[]>(AllocSize);
616
+ std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId };
617
+ PM.addOrInitDeviceGlobalEntry (Alloc.get (), DeviceGlobalName.data ());
618
+ DeviceGlobalAllocations.push_back (std::move (Alloc));
619
+
620
+ // Drop the RTC prefix from the entry's symbol name. Note that the PM
621
+ // still manages this device global under its prefixed name.
622
+ assert (DeviceGlobalName.find (Prefix) == 0 );
623
+ DeviceGlobalName.remove_prefix (Prefix.length ());
624
+ DeviceGlobalEntry->MUniqueId = DeviceGlobalName;
577
625
}
578
626
579
627
return std::make_shared<kernel_bundle_impl>(
580
628
MContext, MDevices, KernelIDs, std::move (KernelNames),
581
- std::move (MangledKernelNames), Binaries, std::move (Prefix),
629
+ std::move (MangledKernelNames), std::move (DeviceGlobalNames),
630
+ std::move (DeviceGlobalAllocations), Binaries, std::move (Prefix),
582
631
MLanguage);
583
632
}
584
633
@@ -680,6 +729,8 @@ class kernel_bundle_impl {
680
729
KernelNames, MLanguage);
681
730
}
682
731
732
+ // Utility methods for kernel_compiler functionality
733
+ private:
683
734
std::string adjust_kernel_name (const std::string &Name) {
684
735
if (MLanguage == syclex::source_language::sycl) {
685
736
auto It = MMangledKernelNames.find (Name);
@@ -694,8 +745,58 @@ class kernel_bundle_impl {
694
745
MKernelNames.end ();
695
746
}
696
747
748
+ std::string mangle_device_global_name (const std::string &Name) {
749
+ // TODO: Support device globals declared in namespaces.
750
+ return " _Z" + std::to_string (Name.length ()) + Name;
751
+ }
752
+
753
+ DeviceGlobalMapEntry *get_device_global_entry (const std::string &Name) {
754
+ if (MKernelNames.empty () || MLanguage != syclex::source_language::sycl) {
755
+ throw sycl::exception (make_error_code (errc::invalid),
756
+ " Querying device globals by name is only available "
757
+ " in kernel_bundles successfully built from "
758
+ " kernel_bundle<bundle_state>::ext_oneapi_source> "
759
+ " with 'sycl' source language." );
760
+ }
761
+
762
+ if (!ext_oneapi_has_device_global (Name)) {
763
+ throw sycl::exception (make_error_code (errc::invalid),
764
+ " device global '" + Name +
765
+ " ' not found in kernel_bundle" );
766
+ }
767
+
768
+ std::vector<DeviceGlobalMapEntry *> Entries =
769
+ ProgramManager::getInstance ().getDeviceGlobalEntries (
770
+ {MPrefix + mangle_device_global_name (Name)});
771
+ assert (Entries.size () == 1 );
772
+ return Entries.front ();
773
+ }
774
+
775
+ void unregister_device_globals_from_context () {
776
+ if (MDeviceGlobalNames.empty ())
777
+ return ;
778
+
779
+ // Manually trigger the release of resources for all device global map
780
+ // entries associated with this runtime-compiled bundle. Normally, this
781
+ // would happen in `~context_impl()`, however in the RTC setting, the
782
+ // context outlives the DG map entries owned by the program manager.
783
+
784
+ std::vector<std::string> DeviceGlobalIDs;
785
+ std::transform (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
786
+ std::back_inserter (DeviceGlobalIDs),
787
+ [&](const std::string &DGName) { return MPrefix + DGName; });
788
+ auto ContextImpl = getSyclObjImpl (MContext);
789
+ for (DeviceGlobalMapEntry *Entry :
790
+ ProgramManager::getInstance ().getDeviceGlobalEntries (
791
+ DeviceGlobalIDs)) {
792
+ Entry->removeAssociatedResources (ContextImpl.get ());
793
+ ContextImpl->removeAssociatedDeviceGlobal (Entry->MDeviceGlobalPtr );
794
+ }
795
+ }
796
+
797
+ public:
697
798
bool ext_oneapi_has_kernel (const std::string &Name) {
698
- return is_kernel_name (adjust_kernel_name (Name));
799
+ return !MKernelNames. empty () && is_kernel_name (adjust_kernel_name (Name));
699
800
}
700
801
701
802
kernel
@@ -768,6 +869,41 @@ class kernel_bundle_impl {
768
869
return AdjustedName;
769
870
}
770
871
872
+ bool ext_oneapi_has_device_global (const std::string &Name) {
873
+ return !MDeviceGlobalNames.empty () &&
874
+ std::find (MDeviceGlobalNames.begin (), MDeviceGlobalNames.end (),
875
+ mangle_device_global_name (Name)) !=
876
+ MDeviceGlobalNames.end ();
877
+ }
878
+
879
+ void *ext_oneapi_get_device_global_address (const std::string &Name,
880
+ const device &Dev) {
881
+ DeviceGlobalMapEntry *Entry = get_device_global_entry (Name);
882
+
883
+ if (std::find (MDevices.begin (), MDevices.end (), Dev) == MDevices.end ()) {
884
+ throw sycl::exception (make_error_code (errc::invalid),
885
+ " kernel_bundle not built for device" );
886
+ }
887
+
888
+ if (Entry->MIsDeviceImageScopeDecorated ) {
889
+ throw sycl::exception (make_error_code (errc::invalid),
890
+ " Cannot query USM pointer for device global with "
891
+ " 'device_image_scope' property" );
892
+ }
893
+
894
+ // TODO: Add context-only initialization via `urUSMContextMemcpyExp` instead
895
+ // of using a throw-away queue.
896
+ queue InitQueue{MContext, Dev};
897
+ auto &USMMem =
898
+ Entry->getOrAllocateDeviceGlobalUSM (getSyclObjImpl (InitQueue));
899
+ InitQueue.wait_and_throw ();
900
+ return USMMem.getPtr ();
901
+ }
902
+
903
+ size_t ext_oneapi_get_device_global_size (const std::string &Name) {
904
+ return get_device_global_entry (Name)->MDeviceGlobalTSize ;
905
+ }
906
+
771
907
bool empty () const noexcept { return MDeviceImages.empty (); }
772
908
773
909
backend get_backend () const noexcept {
@@ -999,6 +1135,7 @@ class kernel_bundle_impl {
999
1135
~kernel_bundle_impl () {
1000
1136
try {
1001
1137
if (MDeviceBinaries) {
1138
+ unregister_device_globals_from_context ();
1002
1139
ProgramManager::getInstance ().removeImages (MDeviceBinaries);
1003
1140
syclex::detail::SYCL_JIT_Destroy (MDeviceBinaries);
1004
1141
}
@@ -1039,6 +1176,8 @@ class kernel_bundle_impl {
1039
1176
// only kernel_bundles created from source have KernelNames member.
1040
1177
std::vector<std::string> MKernelNames;
1041
1178
std::unordered_map<std::string, std::string> MMangledKernelNames;
1179
+ std::vector<std::string> MDeviceGlobalNames;
1180
+ std::vector<std::unique_ptr<std::byte[]>> MDeviceGlobalAllocations;
1042
1181
sycl_device_binaries MDeviceBinaries = nullptr ;
1043
1182
std::string MPrefix;
1044
1183
include_pairs_t MIncludePairs;
0 commit comments