Skip to content

Commit bc90862

Browse files
committed
SYCL: support 0-dim acc in handler::copy(accessor, accessor)
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 6c88351 commit bc90862

File tree

2 files changed

+232
-26
lines changed

2 files changed

+232
-26
lines changed

sycl/include/CL/sycl/handler.hpp

+108-26
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#pragma once
1010

1111
#include <CL/sycl/access/access.hpp>
12+
#include <CL/sycl/atomic.hpp>
1213
#include <CL/sycl/context.hpp>
1314
#include <CL/sycl/detail/cg.hpp>
1415
#include <CL/sycl/detail/export.hpp>
@@ -366,6 +367,107 @@ class __SYCL_EXPORT handler {
366367
return true;
367368
}
368369

370+
/// Handles some special cases of the copy operation from one accessor
371+
/// to another accessor. Returns true if the copy is handled here.
372+
///
373+
/// \param Src is a source SYCL accessor.
374+
/// \param Dst is a destination SYCL accessor.
375+
// TODO: support atomic accessor in Src or/and Dst.
376+
template <typename TSrc, int DimSrc, access::mode ModeSrc,
377+
access::target TargetSrc, typename TDst, int DimDst,
378+
access::mode ModeDst, access::target TargetDst,
379+
access::placeholder IsPHSrc, access::placeholder IsPHDst>
380+
detail::enable_if_t<(DimSrc > 0) && (DimDst > 0), bool>
381+
copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc> Src,
382+
accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst> Dst) {
383+
if (!MIsHost &&
384+
IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
385+
return false;
386+
387+
range<1> LinearizedRange(Src.get_count());
388+
parallel_for<class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc,
389+
TDst, DimDst, ModeDst, TargetDst,
390+
IsPHSrc, IsPHDst>>
391+
(LinearizedRange, [=](id<1> Id) {
392+
size_t Index = Id[0];
393+
id<DimSrc> SrcIndex = getDelinearizedIndex(Src.get_range(), Index);
394+
id<DimDst> DstIndex = getDelinearizedIndex(Dst.get_range(), Index);
395+
Dst[DstIndex] = Src[SrcIndex];
396+
});
397+
return true;
398+
}
399+
400+
template <typename T, int Dim, access::mode Mode, access::target Target,
401+
access::placeholder IsPH>
402+
detail::enable_if_t<Dim == 0 && Mode == access::mode::atomic, T>
403+
readFromFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Src) const {
404+
atomic<T, access::address_space::global_space> AtomicSrc = Src;
405+
return AtomicSrc.load();
406+
}
407+
408+
template <typename T, int Dim, access::mode Mode, access::target Target,
409+
access::placeholder IsPH>
410+
detail::enable_if_t<(Dim > 0) && Mode == access::mode::atomic, T>
411+
readFromFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Src) const {
412+
id<Dim> Id = getDelinearizedIndex(Src.get_range(), 0);
413+
return Src[Id].load();
414+
}
415+
416+
template <typename T, int Dim, access::mode Mode, access::target Target,
417+
access::placeholder IsPH>
418+
detail::enable_if_t<Mode != access::mode::atomic, T>
419+
readFromFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Src) const {
420+
return *(Src.get_pointer());
421+
}
422+
423+
template <typename T, int Dim, access::mode Mode, access::target Target,
424+
access::placeholder IsPH>
425+
detail::enable_if_t<Dim == 0 && Mode == access::mode::atomic, void>
426+
writeToFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Dst, T V) const {
427+
atomic<T, access::address_space::global_space> AtomicDst = Dst;
428+
AtomicDst.store(V);
429+
}
430+
431+
template <typename T, int Dim, access::mode Mode, access::target Target,
432+
access::placeholder IsPH>
433+
detail::enable_if_t<(Dim > 0) && Mode == access::mode::atomic, void>
434+
writeToFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Dst, T V) const {
435+
id<Dim> Id = getDelinearizedIndex(Dst.get_range(), 0);
436+
Dst[Id].store(V);
437+
}
438+
439+
template <typename T, int Dim, access::mode Mode, access::target Target,
440+
access::placeholder IsPH>
441+
detail::enable_if_t<Mode != access::mode::atomic, void>
442+
writeToFirstAccElement(accessor<T, Dim, Mode, Target, IsPH> Dst, T V) const {
443+
*(Dst.get_pointer()) = V;
444+
}
445+
446+
/// Handles some special cases of the copy operation from one accessor
447+
/// to another accessor. Returns true if the copy is handled here.
448+
///
449+
/// Source must have at least as many bytes as the range accessed by Dst.
450+
///
451+
/// \param Src is a source SYCL accessor.
452+
/// \param Dst is a destination SYCL accessor.
453+
template <typename TSrc, int DimSrc, access::mode ModeSrc,
454+
access::target TargetSrc, typename TDst, int DimDst,
455+
access::mode ModeDst, access::target TargetDst,
456+
access::placeholder IsPHSrc, access::placeholder IsPHDst>
457+
detail::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
458+
copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc> Src,
459+
accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst> Dst) {
460+
if (!MIsHost)
461+
return false;
462+
463+
single_task<class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc,
464+
TDst, DimDst, ModeDst, TargetDst,
465+
IsPHSrc, IsPHDst>> ([=]() {
466+
writeToFirstAccElement(Dst, readFromFirstAccElement(Src));
467+
});
468+
return true;
469+
}
470+
369471
constexpr static bool isConstOrGlobal(access::target AccessTarget) {
370472
return AccessTarget == access::target::global_buffer ||
371473
AccessTarget == access::target::constant_buffer;
@@ -985,6 +1087,7 @@ class __SYCL_EXPORT handler {
9851087
///
9861088
/// \param Src is a source SYCL accessor.
9871089
/// \param Dst is a pointer to destination memory.
1090+
// TODO: support 0-dimensional and atomic accessors.
9881091
template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
9891092
access::target AccessTarget,
9901093
access::placeholder IsPlaceholder = access::placeholder::false_t>
@@ -1030,6 +1133,7 @@ class __SYCL_EXPORT handler {
10301133
///
10311134
/// \param Src is a pointer to source memory.
10321135
/// \param Dst is a destination SYCL accessor.
1136+
// TODO: support 0-dimensional and atomic accessors.
10331137
template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
10341138
access::target AccessTarget,
10351139
access::placeholder IsPlaceholder = access::placeholder::false_t>
@@ -1072,7 +1176,7 @@ class __SYCL_EXPORT handler {
10721176
/// Copies the contents of memory object accessed by Src to the memory
10731177
/// object accessed by Dst.
10741178
///
1075-
/// Source must have at least as many bytes as the range accessed by Dst.
1179+
/// Dst must have at least as many bytes as the range accessed by Src.
10761180
///
10771181
/// \param Src is a source SYCL accessor.
10781182
/// \param Dst is a destination SYCL accessor.
@@ -1093,32 +1197,10 @@ class __SYCL_EXPORT handler {
10931197
"Invalid source accessor target for the copy method.");
10941198
static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
10951199
"Invalid destination accessor target for the copy method.");
1096-
// TODO replace to get_size() when it will provide correct values.
1097-
assert(
1098-
(Dst.get_range().size() * sizeof(T_Dst) >=
1099-
Src.get_range().size() * sizeof(T_Src)) &&
1100-
"dest must have at least as many bytes as the range accessed by src.");
1101-
if (MIsHost ||
1102-
!IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range())) {
1103-
range<Dims_Src> CopyRange = Src.get_range();
1104-
size_t Range = 1;
1105-
for (size_t I = 0; I < Dims_Src; ++I)
1106-
Range *= CopyRange[I];
1107-
range<1> LinearizedRange(Range);
1108-
parallel_for< class __copyAcc2Acc< T_Src, Dims_Src, AccessMode_Src,
1109-
AccessTarget_Src, T_Dst, Dims_Dst,
1110-
AccessMode_Dst, AccessTarget_Dst,
1111-
IsPlaceholder_Src,
1112-
IsPlaceholder_Dst>>
1113-
(LinearizedRange, [=](id<1> Id) {
1114-
size_t Index = Id[0];
1115-
id<Dims_Src> SrcIndex = getDelinearizedIndex(Src.get_range(), Index);
1116-
id<Dims_Dst> DstIndex = getDelinearizedIndex(Dst.get_range(), Index);
1117-
Dst[DstIndex] = Src[SrcIndex];
1118-
});
1119-
1200+
assert(Dst.get_size() >= Src.get_size() &&
1201+
"The destination accessor does not fit the copied memory.");
1202+
if (copyAccToAccHelper(Src, Dst))
11201203
return;
1121-
}
11221204
MCGType = detail::CG::COPY_ACC_TO_ACC;
11231205

11241206
detail::AccessorBaseHost *AccBaseSrc = (detail::AccessorBaseHost *)&Src;

sycl/test/basic_tests/handler/handler_mem_op.cpp

+124
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ template <typename T> void test_copy_acc_acc();
4646
template <typename T> void test_update_host();
4747
template <typename T> void test_2D_copy_acc_acc();
4848
template <typename T> void test_3D_copy_acc_acc();
49+
template <typename T>
50+
void test_0D1D_copy_acc_acc();
51+
template <typename T>
52+
void test_0D1D_copy_acc_acc_atomic();
4953
template <typename T> void test_1D2D_copy_acc_acc();
5054
template <typename T> void test_1D3D_copy_acc_acc();
5155
template <typename T> void test_2D1D_copy_acc_acc();
@@ -140,6 +144,19 @@ int main() {
140144
test_3D_copy_acc_acc<point<float>>();
141145
}
142146

147+
// handler.copy(acc, acc) 0D to/from 1D
148+
{
149+
test_0D1D_copy_acc_acc<int>();
150+
test_0D1D_copy_acc_acc<point<int>>();
151+
test_0D1D_copy_acc_acc<point<float>>();
152+
}
153+
154+
// handler.copy(acc, acc) 0D to/from 1D where one/both acc are atomic
155+
{
156+
test_0D1D_copy_acc_acc_atomic<int>();
157+
test_0D1D_copy_acc_acc_atomic<float>();
158+
}
159+
143160
// handler.copy(acc, acc) 1D to 2D
144161
{
145162
test_1D2D_copy_acc_acc<int>();
@@ -433,6 +450,113 @@ template <typename T> void test_3D_copy_acc_acc() {
433450
}
434451
}
435452

453+
template <typename T>
454+
void test_0D1D_copy_acc_acc() {
455+
// Copy 1 element from 0-dim accessor to 1-dim accessor
456+
T Src(1), Dst(0);
457+
{
458+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
459+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
460+
queue Queue;
461+
Queue.submit([&](handler &Cgh) {
462+
accessor<T, 0, access::mode::read, access::target::global_buffer>
463+
AccessorFrom(BufferFrom, Cgh);
464+
accessor<T, 1, access::mode::write, access::target::global_buffer>
465+
AccessorTo(BufferTo, Cgh);
466+
Cgh.copy(AccessorFrom, AccessorTo);
467+
});
468+
}
469+
assert(Dst == 1);
470+
471+
// Copy 1 element from 1-dim accessor to 0-dim accessor
472+
Src = T(3);
473+
Dst = T(0);
474+
{
475+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
476+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
477+
queue Queue;
478+
Queue.submit([&](handler &Cgh) {
479+
accessor<T, 1, access::mode::read, access::target::global_buffer>
480+
AccessorFrom(BufferFrom, Cgh);
481+
accessor<T, 0, access::mode::write, access::target::global_buffer>
482+
AccessorTo(BufferTo, Cgh);
483+
Cgh.copy(AccessorFrom, AccessorTo);
484+
});
485+
}
486+
assert(Dst == 3);
487+
488+
// Copy 1 element from 0-dim accessor to 0-dim accessor
489+
Src = T(7);
490+
Dst = T(0);
491+
{
492+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
493+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
494+
queue Queue;
495+
Queue.submit([&](handler &Cgh) {
496+
accessor<T, 0, access::mode::read, access::target::global_buffer>
497+
AccessorFrom(BufferFrom, Cgh);
498+
accessor<T, 0, access::mode::write, access::target::global_buffer>
499+
AccessorTo(BufferTo, Cgh);
500+
Cgh.copy(AccessorFrom, AccessorTo);
501+
});
502+
}
503+
assert(Dst == 7);
504+
}
505+
506+
template <typename T>
507+
void test_0D1D_copy_acc_acc_atomic() {
508+
// Copy 1 element from 0-dim ATOMIC accessor to 1-dim accessor
509+
T Src = T(1);
510+
T Dst = T(0);
511+
{
512+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
513+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
514+
queue Queue;
515+
Queue.submit([&](handler &Cgh) {
516+
accessor<T, 0, access::mode::atomic, access::target::global_buffer>
517+
AccessorFrom(BufferFrom, Cgh);
518+
accessor<T, 1, access::mode::write, access::target::global_buffer>
519+
AccessorTo(BufferTo, Cgh);
520+
Cgh.copy(AccessorFrom, AccessorTo);
521+
});
522+
}
523+
assert(Dst == 1);
524+
525+
// Copy 1 element from 1-dim ATOMIC accessor to 0-dim accessor
526+
Src = T(3);
527+
Dst = T(0);
528+
{
529+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
530+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
531+
queue Queue;
532+
Queue.submit([&](handler &Cgh) {
533+
accessor<T, 1, access::mode::atomic, access::target::global_buffer>
534+
AccessorFrom(BufferFrom, Cgh);
535+
accessor<T, 0, access::mode::write, access::target::global_buffer>
536+
AccessorTo(BufferTo, Cgh);
537+
Cgh.copy(AccessorFrom, AccessorTo);
538+
});
539+
}
540+
assert(Dst == 3);
541+
542+
// Copy 1 element from 0-dim ATOMIC accessor to 0-dim ATOMIC accessor
543+
Src = T(7);
544+
Dst = T(0);
545+
{
546+
buffer<T, 1> BufferFrom(&Src, range<1>(1));
547+
buffer<T, 1> BufferTo(&Dst, range<1>(1));
548+
queue Queue;
549+
Queue.submit([&](handler &Cgh) {
550+
accessor<T, 0, access::mode::atomic, access::target::global_buffer>
551+
AccessorFrom(BufferFrom, Cgh);
552+
accessor<T, 0, access::mode::atomic, access::target::global_buffer>
553+
AccessorTo(BufferTo, Cgh);
554+
Cgh.copy(AccessorFrom, AccessorTo);
555+
});
556+
}
557+
assert(Dst == 7);
558+
}
559+
436560
template <typename T> void test_1D2D_copy_acc_acc() {
437561
const size_t Size = 20;
438562
std::vector<T> Data(Size);

0 commit comments

Comments
 (0)