Skip to content

Commit 2af29af

Browse files
committed
[SYCL][ESIMD] Support ESIMD extension in the SYCL runtime.
1. Handle 1d accessors differently - wrap in image1d buffer object to work with ESIMD back-end. 2. Introduce new OpenCL image1d buffer type in device code - to represent accessor's wrapped memory object. 3. Add "private proxy" classes to access accessor's memory object in the simd library. Co-authored-by: Vlad Romanov <[email protected]> Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 03ef819 commit 2af29af

15 files changed

+190
-14
lines changed

sycl/include/CL/__spirv/spirv_vars.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
#define __SPIRV_VAR_QUALIFIERS extern "C" const
1717

18-
#ifdef __SYCL_NVPTX__
18+
#if defined(__SYCL_NVPTX__) || defined(__SYCL_EXPLICIT_SIMD__)
1919

2020
SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_x();
2121
SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_y();

sycl/include/CL/sycl/accessor.hpp

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,17 @@
195195
/// accessor_common contains several helpers common for both accessor(1) and
196196
/// accessor(3)
197197

198+
__SYCL_INLINE_NAMESPACE(cl) {
199+
namespace sycl {
200+
namespace intel {
201+
namespace gpu {
202+
// Forward declare a "back-door" access class to support ESIMD.
203+
class AccessorPrivateProxy;
204+
} // namespace gpu
205+
} // namespace intel
206+
} // namespace sycl
207+
} // __SYCL_INLINE_NAMESPACE(cl)
208+
198209
__SYCL_INLINE_NAMESPACE(cl) {
199210
namespace sycl {
200211

@@ -419,6 +430,13 @@ class image_accessor
419430

420431
#endif
421432

433+
private:
434+
friend class sycl::intel::gpu::AccessorPrivateProxy;
435+
436+
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_EXPLICIT_SIMD__)
437+
const OCLImageTy getNativeImageObj() const { return MImageObj; }
438+
#endif // __SYCL_DEVICE_ONLY__ && __SYCL_EXPLICIT_SIMD__
439+
422440
public:
423441
using value_type = DataT;
424442
using reference = DataT &;
@@ -805,8 +823,27 @@ class accessor :
805823

806824
detail::AccessorImplDevice<AdjustedDim> impl;
807825

808-
ConcreteASPtrType MData;
826+
#ifdef __SYCL_EXPLICIT_SIMD__
827+
using OCLImage1dBufferTy =
828+
typename detail::opencl_image1d_buffer_type<AccessMode>::type;
829+
#endif // __SYCL_EXPLICIT_SIMD__
830+
831+
union {
832+
ConcreteASPtrType MData;
833+
#ifdef __SYCL_EXPLICIT_SIMD__
834+
OCLImage1dBufferTy ImageBuffer;
835+
#endif // __SYCL_EXPLICIT_SIMD__
836+
};
837+
838+
#ifdef __SYCL_EXPLICIT_SIMD__
839+
// TODO In ESIMD accessors usage is limited for now - access range, mem
840+
// range and offset are not supported. The cl_mem object allocated for
841+
// a global accessor is always wrapped into a 1d image buffer to enable
842+
// surface index-based addressing.
843+
void __init(OCLImage1dBufferTy ImgBuf) { ImageBuffer = ImgBuf; }
809844

845+
const OCLImage1dBufferTy getNativeImageObj() const { return ImageBuffer; }
846+
#else
810847
void __init(ConcreteASPtrType Ptr, range<AdjustedDim> AccessRange,
811848
range<AdjustedDim> MemRange, id<AdjustedDim> Offset) {
812849
MData = Ptr;
@@ -820,7 +857,7 @@ class accessor :
820857
if (1 == AdjustedDim)
821858
MData += Offset[0];
822859
}
823-
860+
#endif // __SYCL_EXPLICIT_SIMD__
824861
ConcreteASPtrType getQualifiedPtr() const { return MData; }
825862

826863
public:
@@ -843,6 +880,9 @@ class accessor :
843880

844881
#endif // __SYCL_DEVICE_ONLY__
845882

883+
private:
884+
friend class sycl::intel::gpu::AccessorPrivateProxy;
885+
846886
public:
847887
using value_type = DataT;
848888
using reference = DataT &;

sycl/include/CL/sycl/detail/accessor_impl.hpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@
1515
#include <CL/sycl/range.hpp>
1616
#include <CL/sycl/stl.hpp>
1717

18+
__SYCL_INLINE_NAMESPACE(cl) {
19+
namespace sycl {
20+
namespace intel {
21+
namespace gpu {
22+
// Forward declare a "back-door" access class to support ESIMD.
23+
class AccessorPrivateProxy;
24+
} // namespace gpu
25+
} // namespace intel
26+
} // namespace sycl
27+
} // __SYCL_INLINE_NAMESPACE(cl)
28+
1829
__SYCL_INLINE_NAMESPACE(cl) {
1930
namespace sycl {
2031
namespace detail {
@@ -59,16 +70,29 @@ template <int Dims> class LocalAccessorBaseDevice {
5970
}
6071
};
6172

73+
// TODO ESIMD Currently all accessors are treated as ESIMD under corresponding
74+
// compiler option enabling the macro below. Eventually ESIMD kernels and usual
75+
// kernels must co-exist and there must be a mechanism for distinguishing usual
76+
// and ESIMD accessors.
77+
#ifndef __SYCL_EXPLICIT_SIMD__
78+
constexpr bool IsESIMDAccInit = false;
79+
#else
80+
constexpr bool IsESIMDAccInit = true;
81+
#endif // __SYCL_EXPLICIT_SIMD__
82+
6283
class __SYCL_EXPORT AccessorImplHost {
6384
public:
6485
AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange,
6586
access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject,
6687
int Dims, int ElemSize, int OffsetInBytes = 0,
67-
bool IsSubBuffer = false)
88+
bool IsSubBuffer = false, bool IsESIMDAcc = IsESIMDAccInit)
6889
: MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange),
6990
MAccessMode(AccessMode), MSYCLMemObj(SYCLMemObject), MDims(Dims),
7091
MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes),
71-
MIsSubBuffer(IsSubBuffer) {}
92+
MIsSubBuffer(IsSubBuffer) {
93+
MIsESIMDAcc =
94+
IsESIMDAcc && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER);
95+
}
7296

7397
~AccessorImplHost();
7498

@@ -77,7 +101,7 @@ class __SYCL_EXPORT AccessorImplHost {
77101
MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode),
78102
MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims),
79103
MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes),
80-
MIsSubBuffer(Other.MIsSubBuffer) {}
104+
MIsSubBuffer(Other.MIsSubBuffer), MIsESIMDAcc(Other.MIsESIMDAcc) {}
81105

82106
// The resize method provides a way to change the size of the
83107
// allocated memory and corresponding properties for the accessor.
@@ -109,6 +133,9 @@ class __SYCL_EXPORT AccessorImplHost {
109133
Command *MBlockedCmd = nullptr;
110134

111135
bool PerWI = false;
136+
137+
// Whether this accessor is ESIMD accessor with special memory allocation.
138+
bool MIsESIMDAcc;
112139
};
113140

114141
using AccessorImplPtr = shared_ptr_class<AccessorImplHost>;
@@ -121,7 +148,8 @@ class AccessorBaseHost {
121148
bool IsSubBuffer = false) {
122149
impl = shared_ptr_class<AccessorImplHost>(new AccessorImplHost(
123150
Offset, AccessRange, MemoryRange, AccessMode, SYCLMemObject, Dims,
124-
ElemSize, OffsetInBytes, IsSubBuffer));
151+
ElemSize, OffsetInBytes, IsSubBuffer,
152+
IsESIMDAccInit && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER)));
125153
}
126154

127155
protected:
@@ -140,6 +168,9 @@ class AccessorBaseHost {
140168
friend decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject);
141169

142170
AccessorImplPtr impl;
171+
172+
private:
173+
friend class sycl::intel::gpu::AccessorPrivateProxy;
143174
};
144175

145176
class __SYCL_EXPORT LocalAccessorImplHost {

sycl/include/CL/sycl/detail/image_ocl_types.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,30 @@ inline int getSPIRVElementSize(int ImageChannelType, int ImageChannelOrder) {
180180
}
181181
}
182182

183+
#ifdef __SYCL_EXPLICIT_SIMD__
184+
template <access::mode AccessMode> struct opencl_image1d_buffer_type;
185+
186+
// OpenCL types used only when compiling DPCPP ESIMD kernels
187+
#define IMAGE_BUFFER_TY_DEFINE(AccessMode, AMSuffix) \
188+
template <> struct opencl_image1d_buffer_type<access::mode::AccessMode> { \
189+
using type = __ocl_image1d_buffer_##AMSuffix##_t; \
190+
}
191+
192+
IMAGE_BUFFER_TY_DEFINE(read, ro);
193+
IMAGE_BUFFER_TY_DEFINE(write, wo);
194+
IMAGE_BUFFER_TY_DEFINE(discard_write, wo);
195+
IMAGE_BUFFER_TY_DEFINE(read_write, rw);
196+
197+
template <> struct opencl_image1d_buffer_type<access::mode::atomic> {
198+
// static_assert(false && "atomic access not supported for image1d
199+
// buffers");
200+
// TODO this should be disabled; currently there is instantiation of this
201+
// class happenning even if atomic access not used - using dummy type
202+
// definition for now.
203+
using type = unsigned int;
204+
};
205+
#endif // __SYCL_EXPLICIT_SIMD__
206+
183207
template <int Dimensions, access::mode AccessMode, access::target AccessTarget>
184208
struct opencl_image_type;
185209

sycl/include/CL/sycl/detail/memory_manager.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ class __SYCL_EXPORT MemoryManager {
4747
std::vector<EventImplPtr> DepEvents,
4848
RT::PiEvent &OutEvent);
4949

50+
// Allocates memory buffer wrapped into an image. MemObj must be a buffer,
51+
// not an image. Used in ESIMD extension to enable surface index-based access.
52+
static void *wrapIntoImageBuffer(ContextImplPtr TargetContext, void *MemBuf,
53+
SYCLMemObjI *MemObj);
54+
55+
// Releases the image buffer created by wrapIntoImageBuffer.
56+
static void releaseImageBuffer(ContextImplPtr TargetContext, void *ImageBuf);
57+
5058
// The following method creates OpenCL sub buffer for specified
5159
// offset, range, and memory object.
5260
static void *allocateMemSubBuffer(ContextImplPtr TargetContext,

sycl/include/CL/sycl/detail/stl_type_traits.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#pragma once
1010

11+
#include <CL/sycl/detail/defines.hpp>
1112
#include <iterator>
1213
#include <memory>
1314
#include <type_traits>
@@ -35,6 +36,11 @@ using remove_reference_t = typename std::remove_reference<T>::type;
3536

3637
template <typename T> using add_pointer_t = typename std::add_pointer<T>::type;
3738

39+
template <typename T> using remove_cv_t = typename std::remove_cv<T>::type;
40+
41+
template <typename T>
42+
using remove_reference_t = typename std::remove_reference<T>::type;
43+
3844
// C++17
3945
template <bool V> using bool_constant = std::integral_constant<bool, V>;
4046

sycl/source/detail/accessor_impl.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,3 @@ void addHostAccessorAndWait(Requirement *Req) {
4040
}
4141
}
4242
}
43-

sycl/source/detail/memory_manager.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
4545
MemObj->releaseMem(TargetContext, MemAllocation);
4646
}
4747

48+
void MemoryManager::releaseImageBuffer(ContextImplPtr TargetContext,
49+
void *ImageBuf) {
50+
auto PIObj = reinterpret_cast<pi_mem>(ImageBuf);
51+
TargetContext->getPlugin().call<PiApiKind::piMemRelease>(PIObj);
52+
}
53+
4854
void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
4955
SYCLMemObjI *MemObj, void *MemAllocation,
5056
void *UserPtr) {
@@ -75,6 +81,30 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
7581
OutEvent);
7682
}
7783

84+
// Creates an image1d buffer wrapper object around given memory object.
85+
void *MemoryManager::wrapIntoImageBuffer(ContextImplPtr TargetContext,
86+
void *MemBuf, SYCLMemObjI *MemObj) {
87+
// Image format: 1 channel per pixel, each pixel 8 bit, Size pixels occupies
88+
// Size bytes.
89+
pi_image_format Format = {PI_IMAGE_CHANNEL_ORDER_R,
90+
PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8};
91+
92+
// Image descriptor - request wrapper image1d creation.
93+
pi_image_desc Desc = {};
94+
Desc.image_type = PI_MEM_TYPE_IMAGE1D_BUFFER;
95+
Desc.image_width = MemObj->getSize();
96+
Desc.buffer = reinterpret_cast<pi_mem>(MemBuf);
97+
98+
// Create the image object.
99+
const detail::plugin &Plugin = TargetContext->getPlugin();
100+
pi_mem Res = nullptr;
101+
pi_mem_flags Flags = 0;
102+
// Do not ref count the context handle, as it is not captured by the call.
103+
Plugin.call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(), Flags,
104+
&Format, &Desc, nullptr, &Res);
105+
return Res;
106+
}
107+
78108
void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
79109
bool HostPtrReadOnly, size_t Size) {
80110
// Can return user pointer directly if it points to writable memory.

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,14 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
774774
LinkDeviceLibs = false;
775775
}
776776

777+
// TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
778+
// We do not link with other device libraries, because it may fail
779+
// due to unrecognized SPIRV format of those libraries.
780+
if (std::string(LinkOpts).find(std::string("-cmc")) != std::string::npos ||
781+
std::string(LinkOpts).find(std::string("-vc-codegen")) !=
782+
std::string::npos)
783+
LinkDeviceLibs = false;
784+
777785
std::vector<RT::PiProgram> LinkPrograms;
778786
if (LinkDeviceLibs) {
779787
LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms);

sycl/source/detail/scheduler/commands.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,14 @@ cl_int AllocaCommand::enqueueImp() {
751751
detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(),
752752
MInitFromUserData, HostPtr, std::move(EventImpls), Event);
753753

754+
// if this is ESIMD accessor, wrap the allocated device memory buffer into
755+
// an image buffer object.
756+
// TODO Address copying SYCL/ESIMD memory between contexts.
757+
if (getRequirement()->MIsESIMDAcc)
758+
ESIMDExt.MWrapperImage = MemoryManager::wrapIntoImageBuffer(
759+
detail::getSyclObjImpl(MQueue->get_context()), MMemAllocation,
760+
getSYCLMemObj());
761+
754762
return CL_SUCCESS;
755763
}
756764

@@ -937,12 +945,16 @@ cl_int ReleaseCommand::enqueueImp() {
937945
RT::PiEvent &Event = MEvent->getHandleRef();
938946
if (SkipRelease)
939947
Command::waitForEvents(MQueue, EventImpls, Event);
940-
else
948+
else {
941949
MemoryManager::release(detail::getSyclObjImpl(MQueue->get_context()),
942950
MAllocaCmd->getSYCLMemObj(),
943951
MAllocaCmd->getMemAllocation(),
944952
std::move(EventImpls), Event);
945-
953+
// Release the wrapper object if present.
954+
if (void *WrapperImage = MAllocaCmd->ESIMDExt.MWrapperImage)
955+
MemoryManager::releaseImageBuffer(
956+
detail::getSyclObjImpl(MQueue->get_context()), WrapperImage);
957+
}
946958
return CL_SUCCESS;
947959
}
948960

@@ -1638,7 +1650,9 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
16381650
case kernel_param_kind_t::kind_accessor: {
16391651
Requirement *Req = (Requirement *)(Arg.MPtr);
16401652
AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
1641-
RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation();
1653+
RT::PiMem MemArg = Req->MIsESIMDAcc
1654+
? (RT::PiMem)AllocaCmd->ESIMDExt.MWrapperImage
1655+
: (RT::PiMem)AllocaCmd->getMemAllocation();
16421656
if (Plugin.getBackend() == backend::opencl) {
16431657
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex,
16441658
sizeof(RT::PiMem), &MemArg);

sycl/source/detail/scheduler/commands.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,13 @@ class AllocaCommandBase : public Command {
320320

321321
void *MMemAllocation = nullptr;
322322

323+
// ESIMD-extension-specific fields.
324+
struct {
325+
// If this alloca corresponds to an ESIMD accessor, then this field holds
326+
// an image buffer wrapping the memory allocation above.
327+
void *MWrapperImage = nullptr;
328+
} ESIMDExt;
329+
323330
/// Alloca command linked with current command.
324331
/// Device and host alloca commands can be linked, so they may share the same
325332
/// memory. Only one allocation from a pair can be accessed at a time. Alloca

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
559559

560560
const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
561561
Req->MMemoryRange, access::mode::read_write,
562-
Req->MSYCLMemObj, Req->MDims, Req->MElemSize);
562+
Req->MSYCLMemObj, Req->MDims, Req->MElemSize,
563+
0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/,
564+
Req->MIsESIMDAcc);
563565
// Can reuse user data for the first allocation
564566
const bool InitFromUserData = Record->MAllocaCommands.empty();
565567

sycl/source/handler.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
141141
AccImpl->resize(MNDRDesc.GlobalSize.size());
142142
}
143143
MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
144-
if (!IsKernelCreatedFromSource) {
144+
145+
// TODO ESIMD currently does not suport offset, memory and access ranges -
146+
// accessor::init for ESIMD-mode accessor has a single field, translated
147+
// to a single kernel argument set above.
148+
if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) {
145149
// Dimensionality of the buffer is 1 when dimensionality of the
146150
// accessor is 0.
147151
const size_t SizeAccField =

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3736,6 +3736,8 @@ _ZN2cl4sycl6detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS
37363736
_ZN2cl4sycl6detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event
37373737
_ZN2cl4sycl6detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EERSB_
37383738
_ZN2cl4sycl6detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EERS9_
3739+
_ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv
3740+
_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE
37393741
_ZN2cl4sycl6detail14getBorderColorENS0_19image_channel_orderE
37403742
_ZN2cl4sycl6detail14host_half_impl4halfC1ERKf
37413743
_ZN2cl4sycl6detail14host_half_impl4halfC2ERKf

0 commit comments

Comments
 (0)