Skip to content

Commit a5a0e12

Browse files
authored
[SYCL][COMPAT] Adding 2-byte and 4-bytes memset operations to headers (#13409)
This PR replaces #11340 This PR extends the memory header to include 2 byte and 4 byte memsets. - memset remains unchanged. - 2D / 3D memsets are templated and wrap `sycl::fill`. Functionality remains unchanged as it is exposed through `detail::memset<unsigned char>`, equivalent to what we had before. - memset_d16 and memset_d32 calls are added wrapped around `sycl::fill` using 2-byte and 4-byte datatypes Added tests for memset_d16 and memset_d32.
1 parent 0e004f9 commit a5a0e12

File tree

3 files changed

+300
-78
lines changed

3 files changed

+300
-78
lines changed

sycl/include/syclcompat/memory.hpp

Lines changed: 152 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -199,23 +199,23 @@ static inline sycl::event memset(sycl::queue q, void *dev_ptr, int value,
199199
return q.memset(dev_ptr, value, size);
200200
}
201201

202-
/// Set \p value to the 3D memory region pointed by \p data in \p q. \p size
203-
/// specifies the 3D memory size to set.
204-
///
205-
/// \param q The queue in which the operation is done.
206-
/// \param data Pointer to the device memory region.
207-
/// \param value Value to be set.
208-
/// \param size Memory region size.
209-
/// \returns An event list representing the memset operations.
210-
static inline std::vector<sycl::event> memset(sycl::queue q, pitched_data data,
211-
int value, sycl::range<3> size) {
202+
/// \brief Sets \p value to the 3D memory region pointed by \p data in \p q.
203+
/// \tparam T The type of the element to be set.
204+
/// \param [in] q The queue in which the operation is done.
205+
/// \param [in] data Pointer to the pitched device memory region.
206+
/// \param [in] value The value to be set.
207+
/// \param [in] size 3D memory region by number of elements.
208+
/// \return An event list representing the memset operations.
209+
template <typename T>
210+
static inline std::vector<sycl::event>
211+
memset(sycl::queue q, pitched_data data, const T &value, sycl::range<3> size) {
212212
std::vector<sycl::event> event_list;
213213
size_t slice = data.get_pitch() * data.get_y();
214214
unsigned char *data_surface = (unsigned char *)data.get_data_ptr();
215215
for (size_t z = 0; z < size.get(2); ++z) {
216216
unsigned char *data_ptr = data_surface;
217217
for (size_t y = 0; y < size.get(1); ++y) {
218-
event_list.push_back(memset(q, data_ptr, value, size.get(0)));
218+
event_list.push_back(detail::fill<T>(q, data_ptr, value, size.get(0)));
219219
data_ptr += data.get_pitch();
220220
}
221221
data_surface += slice;
@@ -225,15 +225,18 @@ static inline std::vector<sycl::event> memset(sycl::queue q, pitched_data data,
225225

226226
/// \brief Sets \p val to the pitched 2D memory region pointed by \p ptr in \p
227227
/// q.
228+
/// \tparam T The type of the element to be set.
228229
/// \param [in] q The queue in which the operation is done.
229230
/// \param [in] ptr Pointer to the virtual device memory.
230231
/// \param [in] pitch The pitch size by number of elements, including padding.
231232
/// \param [in] value The value to be set.
232233
/// \param [in] x The width of memory region by number of elements.
233234
/// \param [in] y The height of memory region by number of elements.
234235
/// \return An event list representing the memset operations.
235-
static inline std::vector<sycl::event>
236-
memset(sycl::queue q, void *ptr, size_t pitch, int value, size_t x, size_t y) {
236+
template <typename T>
237+
static inline std::vector<sycl::event> memset(sycl::queue q, void *ptr,
238+
size_t pitch, const T &value,
239+
size_t x, size_t y) {
237240
return memset(q, pitched_data(ptr, pitch, x, 1), value,
238241
sycl::range<3>(x, y, 1));
239242
}
@@ -407,8 +410,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
407410
}));
408411
break;
409412
default:
410-
throw std::runtime_error("[SYCLcompat]"
411-
"memcpy: invalid direction value");
413+
throw std::runtime_error("[SYCLcompat] memcpy: invalid direction value");
412414
}
413415
return event_list;
414416
}
@@ -731,7 +733,7 @@ static void inline fill(void *dev_ptr, const T &pattern, size_t count,
731733
/// \param pattern Pattern of type \p T to be set.
732734
/// \param count Number of elements to be set to the patten.
733735
/// \param q The queue in which the operation is done.
734-
/// \returns no return value.
736+
/// \returns An event representing the fill operation.
735737
template <class T>
736738
static sycl::event inline fill_async(void *dev_ptr, const T &pattern,
737739
size_t count,
@@ -752,51 +754,151 @@ static void memset(void *dev_ptr, int value, size_t size,
752754
detail::memset(q, dev_ptr, value, size).wait();
753755
}
754756

755-
/// Asynchronously sets \p value to the first \p size bytes starting from \p
756-
/// dev_ptr. The return of the function does NOT guarantee the memset operation
757-
/// is completed.
758-
///
757+
/// \brief Sets 2 bytes data \p value to the first \p size elements starting
758+
/// from \p dev_ptr in \p q synchronously.
759+
/// \param [in] dev_ptr Pointer to the virtual device memory address.
760+
/// \param [in] value The value to be set.
761+
/// \param [in] size Number of elements to be set to the value.
762+
/// \param [in] q The queue in which the operation is done.
763+
static inline void memset_d16(void *dev_ptr, unsigned short value, size_t size,
764+
sycl::queue q = get_default_queue()) {
765+
detail::fill<unsigned short>(q, dev_ptr, value, size).wait();
766+
}
767+
768+
/// \brief Sets 4 bytes data \p value to the first \p size elements starting
769+
/// from \p dev_ptr in \p q synchronously.
770+
/// \param [in] dev_ptr Pointer to the virtual device memory address.
771+
/// \param [in] value The value to be set.
772+
/// \param [in] size Number of elements to be set to the value.
773+
/// \param [in] q The queue in which the operation is done.
774+
static inline void memset_d32(void *dev_ptr, unsigned int value, size_t size,
775+
sycl::queue q = get_default_queue()) {
776+
detail::fill<unsigned int>(q, dev_ptr, value, size).wait();
777+
}
778+
779+
/// \brief Sets 1 byte data \p value to the first \p size elements starting
780+
/// from \p dev_ptr in \p q asynchronously.
759781
/// \param dev_ptr Pointer to the device memory address.
760782
/// \param value Value to be set.
761783
/// \param size Number of bytes to be set to the value.
762-
/// \returns no return value.
763-
static sycl::event memset_async(void *dev_ptr, int value, size_t size,
764-
sycl::queue q = get_default_queue()) {
784+
/// \returns An event representing the memset operation.
785+
static inline sycl::event memset_async(void *dev_ptr, int value, size_t size,
786+
sycl::queue q = get_default_queue()) {
765787
return detail::memset(q, dev_ptr, value, size);
766788
}
767789

768-
/// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and
769-
/// \p y specify the setted 2D memory size. \p pitch is the bytes in linear
770-
/// dimension, including padding bytes. The function will return after the
771-
/// memset operation is completed.
772-
///
773-
/// \param ptr Pointer to the device memory region.
774-
/// \param pitch Bytes in linear dimension, including padding bytes.
775-
/// \param value Value to be set.
776-
/// \param x The setted memory size in linear dimension.
777-
/// \param y The setted memory size in second dimension.
778-
/// \param q The queue in which the operation is done.
779-
/// \returns no return value.
790+
/// \brief Sets 2 bytes data \p value to the first \p size elements starting
791+
/// from \p dev_ptr in \p q asynchronously.
792+
/// \param [in] dev_ptr Pointer to the virtual device memory address.
793+
/// \param [in] value The value to be set.
794+
/// \param [in] size Number of elements to be set to the value.
795+
/// \param [in] q The queue in which the operation is done.
796+
/// \returns An event representing the memset operation.
797+
static inline sycl::event
798+
memset_d16_async(void *dev_ptr, unsigned short value, size_t size,
799+
sycl::queue q = get_default_queue()) {
800+
return detail::fill<unsigned short>(q, dev_ptr, value, size);
801+
}
802+
803+
/// \brief Sets 4 bytes data \p value to the first \p size elements starting
804+
/// from \p dev_ptr in \p q asynchronously.
805+
/// \param [in] dev_ptr Pointer to the virtual device memory address.
806+
/// \param [in] value The value to be set.
807+
/// \param [in] size Number of elements to be set to the value.
808+
/// \param [in] q The queue in which the operation is done.
809+
/// \returns An event representing the memset operation.
810+
static inline sycl::event
811+
memset_d32_async(void *dev_ptr, unsigned int value, size_t size,
812+
sycl::queue q = get_default_queue()) {
813+
return detail::fill<unsigned int>(q, dev_ptr, value, size);
814+
}
815+
816+
/// \brief Sets 1 byte data \p val to the pitched 2D memory region pointed by \p
817+
/// ptr in \p q synchronously.
818+
/// \param [in] ptr Pointer to the virtual device memory.
819+
/// \param [in] pitch The pitch size by number of elements, including padding.
820+
/// \param [in] val The value to be set.
821+
/// \param [in] x The width of memory region by number of elements.
822+
/// \param [in] y The height of memory region by number of elements.
823+
/// \param [in] q The queue in which the operation is done.
780824
static inline void memset(void *ptr, size_t pitch, int val, size_t x, size_t y,
781825
sycl::queue q = get_default_queue()) {
826+
sycl::event::wait(detail::memset<unsigned char>(q, ptr, pitch, val, x, y));
827+
}
828+
829+
/// \brief Sets 2 bytes data \p val to the pitched 2D memory region pointed by
830+
/// ptr in \p q synchronously.
831+
/// \param [in] ptr Pointer to the virtual device memory.
832+
/// \param [in] pitch The pitch size by number of elements, including padding.
833+
/// \param [in] val The value to be set.
834+
/// \param [in] x The width of memory region by number of elements.
835+
/// \param [in] y The height of memory region by number of elements.
836+
/// \param [in] q The queue in which the operation is done.
837+
static inline void memset_d16(void *ptr, size_t pitch, unsigned short val,
838+
size_t x, size_t y,
839+
sycl::queue q = get_default_queue()) {
782840
sycl::event::wait(detail::memset(q, ptr, pitch, val, x, y));
783841
}
784842

785-
/// Sets \p value to the 2D memory region pointed by \p ptr in \p q. \p x and
786-
/// \p y specify the setted 2D memory size. \p pitch is the bytes in linear
787-
/// dimension, including padding bytes. The return of the function does NOT
788-
/// guarantee the memset operation is completed.
789-
///
790-
/// \param ptr Pointer to the device memory region.
791-
/// \param pitch Bytes in linear dimension, including padding bytes.
792-
/// \param value Value to be set.
793-
/// \param x The setted memory size in linear dimension.
794-
/// \param y The setted memory size in second dimension.
795-
/// \param q The queue in which the operation is done.
796-
/// \returns no return value.
843+
/// \brief Sets 4 bytes data \p val to the pitched 2D memory region pointed by
844+
/// ptr in \p q synchronously.
845+
/// \param [in] ptr Pointer to the virtual device memory.
846+
/// \param [in] pitch The pitch size by number of elements, including padding.
847+
/// \param [in] val The value to be set.
848+
/// \param [in] x The width of memory region by number of elements.
849+
/// \param [in] y The height of memory region by number of elements.
850+
/// \param [in] q The queue in which the operation is done.
851+
static inline void memset_d32(void *ptr, size_t pitch, unsigned int val,
852+
size_t x, size_t y,
853+
sycl::queue q = get_default_queue()) {
854+
sycl::event::wait(detail::memset(q, ptr, pitch, val, x, y));
855+
}
856+
857+
/// \brief Sets 1 byte data \p val to the pitched 2D memory region pointed by \p
858+
/// ptr in \p q asynchronously.
859+
/// \param [in] ptr Pointer to the virtual device memory.
860+
/// \param [in] pitch The pitch size by number of elements, including padding.
861+
/// \param [in] val The value to be set.
862+
/// \param [in] x The width of memory region by number of elements.
863+
/// \param [in] y The height of memory region by number of elements.
864+
/// \param [in] q The queue in which the operation is done.
865+
/// \returns An event representing the memset operation.
797866
static inline sycl::event memset_async(void *ptr, size_t pitch, int val,
798867
size_t x, size_t y,
799868
sycl::queue q = get_default_queue()) {
869+
870+
auto events = detail::memset<unsigned char>(q, ptr, pitch, val, x, y);
871+
return detail::combine_events(events, q);
872+
}
873+
874+
/// \brief Sets 2 bytes data \p val to the pitched 2D memory region pointed by
875+
/// \p ptr in \p q asynchronously.
876+
/// \param [in] ptr Pointer to the virtual device memory.
877+
/// \param [in] pitch The pitch size by number of elements, including padding.
878+
/// \param [in] val The value to be set.
879+
/// \param [in] x The width of memory region by number of elements.
880+
/// \param [in] y The height of memory region by number of elements.
881+
/// \param [in] q The queue in which the operation is done.
882+
/// \returns An event representing the memset operation.
883+
static inline sycl::event
884+
memset_d16_async(void *ptr, size_t pitch, unsigned short val, size_t x,
885+
size_t y, sycl::queue q = get_default_queue()) {
886+
auto events = detail::memset(q, ptr, pitch, val, x, y);
887+
return detail::combine_events(events, q);
888+
}
889+
890+
/// \brief Sets 4 bytes data \p val to the pitched 2D memory region pointed by
891+
/// \p ptr in \p q asynchronously.
892+
/// \param [in] ptr Pointer to the virtual device memory.
893+
/// \param [in] pitch The pitch size by number of elements, including padding.
894+
/// \param [in] val The value to be set.
895+
/// \param [in] x The width of memory region by number of elements.
896+
/// \param [in] y The height of memory region by number of elements.
897+
/// \param [in] q The queue in which the operation is done.
898+
/// \returns An event representing the memset operation.
899+
static inline sycl::event
900+
memset_d32_async(void *ptr, size_t pitch, unsigned int val, size_t x, size_t y,
901+
sycl::queue q = get_default_queue()) {
800902
auto events = detail::memset(q, ptr, pitch, val, x, y);
801903
return detail::combine_events(events, q);
802904
}
@@ -812,7 +914,7 @@ static inline sycl::event memset_async(void *ptr, size_t pitch, int val,
812914
/// \returns no return value.
813915
static inline void memset(pitched_data pitch, int val, sycl::range<3> size,
814916
sycl::queue q = get_default_queue()) {
815-
sycl::event::wait(detail::memset(q, pitch, val, size));
917+
sycl::event::wait(detail::memset<unsigned char>(q, pitch, val, size));
816918
}
817919

818920
/// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size
@@ -823,11 +925,11 @@ static inline void memset(pitched_data pitch, int val, sycl::range<3> size,
823925
/// \param value Value to be set.
824926
/// \param size The setted 3D memory size.
825927
/// \param q The queue in which the operation is done.
826-
/// \returns no return value.
928+
/// \returns An event representing the memset operation.
827929
static inline sycl::event memset_async(pitched_data pitch, int val,
828930
sycl::range<3> size,
829931
sycl::queue q = get_default_queue()) {
830-
auto events = detail::memset(q, pitch, val, size);
932+
auto events = detail::memset<unsigned char>(q, pitch, val, size);
831933
return detail::combine_events(events, q);
832934
}
833935

0 commit comments

Comments
 (0)