42
42
#include < utility>
43
43
44
44
#include < sycl/builtins.hpp>
45
- #include < sycl/ext/intel/experimental/usm_properties.hpp>
46
45
#include < sycl/ext/oneapi/group_local_memory.hpp>
46
+ #include < sycl/group.hpp>
47
47
#include < sycl/usm.hpp>
48
48
49
+ #ifdef SYCL_EXT_ONEAPI_USM_DEVICE_READ_ONLY
50
+ #include < sycl/ext/intel/experimental/usm_properties.hpp>
51
+ #endif
52
+
49
53
#include < syclcompat/device.hpp>
50
54
#include < syclcompat/traits.hpp>
51
55
52
56
#if defined(__linux__)
53
57
#include < sys/mman.h>
54
58
#elif defined(_WIN64)
59
+ #ifndef NOMINMAX
55
60
#define NOMINMAX
61
+ #endif
56
62
#include < windows.h>
57
63
#else
58
64
#error "Only support Windows and Linux."
@@ -123,9 +129,8 @@ template <memory_region Memory, class T = byte_t> class memory_traits {
123
129
(Memory == memory_region::local)
124
130
? sycl::access::address_space::local_space
125
131
: sycl::access::address_space::global_space;
126
- static constexpr syclcompat::target target = (Memory == memory_region::local)
127
- ? syclcompat::target::local
128
- : syclcompat::target::device;
132
+ static constexpr target target =
133
+ (Memory == memory_region::local) ? target::local : target::device;
129
134
static constexpr sycl::access_mode mode = (Memory == memory_region::constant)
130
135
? sycl::access_mode::read
131
136
: sycl::access_mode::read_write;
@@ -136,7 +141,7 @@ template <memory_region Memory, class T = byte_t> class memory_traits {
136
141
using value_t = typename std::remove_cv_t <T>;
137
142
template <size_t Dimension = 1 >
138
143
using accessor_t =
139
- typename std::conditional_t <target == syclcompat:: target::local,
144
+ typename std::conditional_t <target == target::local,
140
145
sycl::local_accessor<T, Dimension>,
141
146
sycl::accessor<T, Dimension, mode>>;
142
147
using pointer_t = T *;
@@ -148,20 +153,27 @@ static inline void *malloc(size_t size, sycl::queue q) {
148
153
149
154
// / Calculate pitch (padded length of major dimension \p x) by rounding up to
150
155
// / multiple of 32.
151
- // / \param x The dimension to be padded
152
- // / \returns size_t representing pitched length of dimension x.
156
+ // / \param x The dimension to be padded (in bytes)
157
+ // / \returns size_t representing pitched length of dimension x (in bytes) .
153
158
static inline constexpr size_t get_pitch (size_t x) {
154
159
return ((x) + 31 ) & ~(0x1F );
155
160
}
156
161
162
+ // / \brief Malloc pitched 3D data
163
+ // / \param [out] pitch returns the calculated pitch (in bytes)
164
+ // / \param [in] x width of the allocation (in bytes)
165
+ // / \param [in] y height of the allocation
166
+ // / \param [in] z depth of the allocation
167
+ // / \param [in] q The queue in which the operation is done.
168
+ // / \returns A pointer to the allocated memory
157
169
static inline void *malloc (size_t &pitch, size_t x, size_t y, size_t z,
158
170
sycl::queue q) {
159
171
pitch = get_pitch (x);
160
172
return malloc (pitch * y * z, q);
161
173
}
162
174
163
- // / Set \p pattern to the first \p count elements of type \p T starting from \p
164
- // / dev_ptr.
175
+ // / \brief Set \p pattern to the first \p count elements of type \p T
176
+ // / starting from \p dev_ptr.
165
177
// /
166
178
// / \tparam T Datatype of the pattern to be set.
167
179
// / \param q The queue in which the operation is done.
@@ -211,10 +223,18 @@ static inline std::vector<sycl::event> memset(sycl::queue q, pitched_data data,
211
223
return event_list;
212
224
}
213
225
214
- // / memset 2D matrix with pitch.
226
+ // / \brief Sets \p val to the pitched 2D memory region pointed by \p ptr in \p
227
+ // / q.
228
+ // / \param [in] q The queue in which the operation is done.
229
+ // / \param [in] ptr Pointer to the virtual device memory.
230
+ // / \param [in] pitch The pitch size by number of elements, including padding.
231
+ // / \param [in] value The value to be set.
232
+ // / \param [in] x The width of memory region by number of elements.
233
+ // / \param [in] y The height of memory region by number of elements.
234
+ // / \return An event list representing the memset operations.
215
235
static inline std::vector<sycl::event>
216
- memset (sycl::queue q, void *ptr, size_t pitch, int val , size_t x, size_t y) {
217
- return memset (q, pitched_data (ptr, pitch, x, 1 ), val ,
236
+ memset (sycl::queue q, void *ptr, size_t pitch, int value , size_t x, size_t y) {
237
+ return memset (q, pitched_data (ptr, pitch, x, 1 ), value ,
218
238
sycl::range<3 >(x, y, 1 ));
219
239
}
220
240
@@ -387,7 +407,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
387
407
}));
388
408
break ;
389
409
default :
390
- throw std::runtime_error (" syclcompat:: "
410
+ throw std::runtime_error (" [SYCLcompat] "
391
411
" memcpy: invalid direction value" );
392
412
}
393
413
return event_list;
@@ -523,6 +543,7 @@ static inline void free(void *ptr, sycl::queue q = get_default_queue()) {
523
543
// / \param pointers The pointers point to the device memory requested to be
524
544
// / freed. \param events The events to be waited. \param q The sycl::queue the
525
545
// / memory relates to.
546
+ // Can't be static due to the friend declaration in the memory header.
526
547
inline sycl::event free_async (const std::vector<void *> &pointers,
527
548
const std::vector<sycl::event> &events,
528
549
sycl::queue q = get_default_queue()) {
@@ -637,7 +658,7 @@ static inline void memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
637
658
// / \param x Range of dim x of matrix to be copied.
638
659
// / \param y Range of dim y of matrix to be copied.
639
660
// / \param q Queue to execute the copy task.
640
- // / \returns no return value .
661
+ // / \returns An event representing the memcpy operation .
641
662
static inline sycl::event memcpy_async (void *to_ptr, size_t to_pitch,
642
663
const void *from_ptr, size_t from_pitch,
643
664
size_t x, size_t y,
@@ -676,7 +697,7 @@ static inline void memcpy(pitched_data to, sycl::id<3> to_pos,
676
697
// / \param from_pos Position of destination.
677
698
// / \param size Range of the submatrix to be copied.
678
699
// / \param q Queue to execute the copy task.
679
- // / \returns no return value .
700
+ // / \returns An event representing the memcpy operation .
680
701
static inline sycl::event memcpy_async (pitched_data to, sycl::id<3 > to_pos,
681
702
pitched_data from, sycl::id<3 > from_pos,
682
703
sycl::range<3 > size,
@@ -845,8 +866,8 @@ template <class T, memory_region Memory> class accessor<T, Memory, 2> {
845
866
using accessor_t = typename memory_t ::template accessor_t <2 >;
846
867
accessor (pointer_t data, const sycl::range<2 > &in_range)
847
868
: _data(data), _range(in_range) {}
848
- template <memory_region M = Memory>
849
- accessor (typename std::enable_if<M != memory_region::local,
869
+ template <memory_region Mem = Memory>
870
+ accessor (typename std::enable_if<Mem != memory_region::local,
850
871
const accessor_t >::type &acc)
851
872
: accessor(acc, acc.get_range()) {}
852
873
accessor (const accessor_t &acc, const sycl::range<2 > &in_range)
@@ -869,7 +890,7 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
869
890
using accessor_t =
870
891
typename detail::memory_traits<Memory, T>::template accessor_t <Dimension>;
871
892
using value_t = typename detail::memory_traits<Memory, T>::value_t ;
872
- using compat_accessor_t = syclcompat::accessor<T, Memory, Dimension>;
893
+ using syclcompat_accessor_t = syclcompat::accessor<T, Memory, Dimension>;
873
894
874
895
device_memory (sycl::queue q = get_default_queue())
875
896
: device_memory(sycl::range<Dimension>(1 ), q) {}
@@ -886,9 +907,9 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
886
907
}
887
908
888
909
// / Constructor of 2-D array with initializer list
889
- template <size_t D = Dimension>
910
+ template <size_t Dim = Dimension>
890
911
device_memory (
891
- const typename std::enable_if<D == 2 , sycl::range<2 >>::type &in_range,
912
+ const typename std::enable_if<Dim == 2 , sycl::range<2 >>::type &in_range,
892
913
std::initializer_list<std::initializer_list<value_t >> &&init_list,
893
914
sycl::queue q = get_default_queue())
894
915
: device_memory(in_range, q) {
@@ -919,8 +940,8 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
919
940
// / Constructor with range
920
941
// enable_if_t SFINAE to avoid ambiguity with
921
942
// device_memory(Args... Arguments, sycl::queue q)
922
- template <class ... Args, size_t D = Dimension,
923
- typename = std::enable_if_t <sizeof ...(Args) == D >>
943
+ template <class ... Args, size_t Dim = Dimension,
944
+ typename = std::enable_if_t <sizeof ...(Args) == Dim >>
924
945
device_memory (Args... Arguments)
925
946
: device_memory(sycl::range<Dimension>(Arguments...),
926
947
get_default_queue()) {}
@@ -937,7 +958,8 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
937
958
std::free (_host_ptr);
938
959
}
939
960
940
- // / Allocate memory with default queue, and init memory if has initial value.
961
+ // / Allocate memory with the queue specified in the constuctor, and init
962
+ // / memory if has initial value
941
963
void init () { init (_q); }
942
964
// / Allocate memory with specified queue, and init memory if has initial
943
965
// / value.
@@ -957,11 +979,10 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
957
979
new (this ) device_memory (src, size, _q);
958
980
}
959
981
960
- // / Get memory pointer of the memory object, which is virtual pointer when
961
- // / usm is not used, and device pointer when usm is used.
982
+ // Get memory pointer of the memory object, a device USM pointer.
962
983
value_t *get_ptr () { return get_ptr (_q); }
963
- // / Get memory pointer of the memory object, which is virtual pointer when
964
- // / usm is not used, and device pointer when usm is used .
984
+
985
+ // Get memory pointer of the memory object, a device USM pointer .
965
986
value_t *get_ptr (sycl::queue q) {
966
987
init (q);
967
988
return _device_ptr;
@@ -970,18 +991,18 @@ template <class T, memory_region Memory, size_t Dimension> class device_memory {
970
991
// / Get the device memory object size in bytes.
971
992
size_t get_size () { return _size; }
972
993
973
- template <size_t D = Dimension>
974
- typename std::enable_if<D == 1 , T>::type &operator [](size_t index) {
994
+ template <size_t Dim = Dimension>
995
+ typename std::enable_if<Dim == 1 , T>::type &operator [](size_t index) {
975
996
init ();
976
997
return _device_ptr[index ];
977
998
}
978
999
979
1000
// / Get compat_accessor with dimension info for the device memory object
980
1001
// / when usm is used and dimension is greater than 1.
981
- template <size_t D = Dimension>
982
- typename std::enable_if<D != 1 , compat_accessor_t >::type
1002
+ template <size_t Dim = Dimension>
1003
+ typename std::enable_if<Dim != 1 , syclcompat_accessor_t >::type
983
1004
get_access (sycl::handler &cgh) {
984
- return compat_accessor_t ((T *)_device_ptr, _range);
1005
+ return syclcompat_accessor_t ((T *)_device_ptr, _range);
985
1006
}
986
1007
987
1008
private:
@@ -1023,10 +1044,11 @@ class device_memory<T, Memory, 0> : public device_memory<T, Memory, 1> {
1023
1044
typename detail::memory_traits<Memory, T>::template accessor_t <0 >;
1024
1045
1025
1046
// / Constructor with initial value.
1026
- device_memory (const value_t &val) : base(sycl::range<1 >(1 ), {val}) {}
1047
+ device_memory (const value_t &val, sycl::queue q = get_default_queue())
1048
+ : base(sycl::range<1 >(1 ), {val}, q) {}
1027
1049
1028
1050
// / Default constructor
1029
- device_memory () : base(1 ) {}
1051
+ device_memory (sycl::queue q = get_default_queue()) : base(1 , q ) {}
1030
1052
};
1031
1053
1032
1054
template <class T , size_t Dimension>
0 commit comments