diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index cf049fee41869..cc523c89b5ad9 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 8e6ea72e44dab71e976b3daf439073c4ef9a847b -# Merge: 1f6945ce c57c181d +# commit e2df8acd37c3c159364cfdf9dfdc1be35b71779e +# Merge: 992ff37f 3a31ffee # Author: Kenneth Benzie (Benie) -# Date: Wed Jan 29 15:54:56 2025 +0000 -# Merge pull request #2619 from martygrant/martin/memimage-info-unswitch-redo -# Move urMemImageGetInfo success test from a switch to individual test -set(UNIFIED_RUNTIME_TAG 8e6ea72e44dab71e976b3daf439073c4ef9a847b) +# Date: Fri Jan 31 10:15:03 2025 +0000 +# Merge pull request #2575 from DBDuncan/duncan/extend-copies +# [CUDA][Bindless] Add support for device to device pitched copies and host to host copies +set(UNIFIED_RUNTIME_TAG e2df8acd37c3c159364cfdf9dfdc1be35b71779e) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index d99f988d49cbb..4d1e35e05b7b9 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -805,10 +805,11 @@ public: // Simple HtoD or DtoH copy with USM device memory void ext_oneapi_copy(const void *Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch); - // HtoD or DtoH copy with USM device memory, using offsets, extent + // Host to device or device to host copy with USM device memory with offsets + // and extent void ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, @@ -819,13 +820,15 @@ public: sycl::range<3> HostExtent, sycl::range<3> CopyExtent); - // Simple device to device copy + // Simple device to device opaque memory to opaque memory copy void ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc); + const ext::oneapi::experimental::image_descriptor &DestImgDesc); - // Device to device copy with offsets and extent + // Device to device opaque memory to opaque memory copy with offsets and + // extent void ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset, @@ -833,7 +836,62 @@ public: ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, - sycl::range<3> CopyExtent) + sycl::range<3> CopyExtent); + + // Simple device to device opaque memory to USM copy + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); + + // Device to device opaque memory to USM copy with offsets and extent + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent); + + // Simple device to device USM to opaque memory copy + void ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc); + + // Device to device USM to opaque memory copy with offsets and extent + void ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent); + + // Simple device to device or host to host USM to USM copy + void ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); + + // Device to device or host to host USM to USM copy with offsets and extent + void ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent); }; class queue { @@ -882,16 +940,16 @@ public: event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc); + const ext::oneapi::experimental::image_descriptor &SrcImgDesc); event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, event DepEvent); event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const std::vector &DepEvents); // Device to host copy with offsets and extent @@ -920,7 +978,7 @@ public: range<3> DestExtent, range<3> CopyExtent, const std::vector &DepEvents); - // Host to device OR device to host using USM device memory + // Simple host to device or device to host copy with USM device memory event ext_oneapi_copy( const void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, @@ -936,8 +994,8 @@ public: size_t DeviceRowPitch, const std::vector &DepEvents); - // Host to device OR device to host using USM device memory, - // with control over sub-region + // Host to device or device to host copy with USM device memory with offsets + // and extent event ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, void *Dest, sycl::range<3> DestOffset, @@ -960,49 +1018,215 @@ public: sycl::range<3> HostExtent, sycl::range<3> CopyExtent); - // Simple device to device copy + // Simple device to device opaque memory to opaque memory copy event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc); + const ext::oneapi::experimental::image_descriptor &DestImgDesc); event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent); event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, const std::vector &DepEvents); - // Device to device copy with offsets and extent - void ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, - const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DestImgDesc, - sycl::range<3> CopyExtent) - void ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, - const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DestImgDesc, - sycl::range<3> CopyExtent - event DepEvent) - void ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, - const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DestImgDesc, - sycl::range<3> CopyExtent - const std::vector &DepEvents) + // Device to device opaque memory to opaque memory copy with offsets and + // extent + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + event DepEvent); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + const std::vector &DepEvents); + + // Simple device to device opaque memory to USM copy + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + event DepEvent); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + const std::vector &DepEvents); + + // Device to device opaque memory to USM copy with offsets and extent + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent, + event DepEvent); + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent, + const std::vector &DepEvents); + + + // Simple device to device USM to opaque memory copy + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc); + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + event DepEvent); + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const std::vector &DepEvents); + + // Device to device USM to opaque memory copy with offsets and extent + event ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent); + event ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + event DepEvent); + event ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + const std::vector &DepEvents); + + // Simple device to device or host to host USM to USM copy + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + event DepEvent); + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + const std::vector &DepEvents); + + // Device to device or host to host USM to USM copy with offsets and extent + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent); + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent, event DepEvent); + event ext_oneapi_copy( + const void *Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, + void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + sycl::range<3> CopyExtent, + const std::vector &DepEvents); }; } ``` @@ -2338,4 +2562,12 @@ These features still need to be handled: spec using asciidoc include. |6.5|2024-10-22| - Allow 3-channel image formats on some backends. |6.6|2025-01-20| - Clarify support for the specific types of USM allocations. +|6.7|2025-01-27| - Update `image_mem_handle` to `image_mem_handle` copies to + accept two image descriptors. + - Add support for USM to `image_mem_handle` copies and + sub-copies. + - Add support for `image_mem_handle` to USM copies and + sub-copies. + - Add support for USM to USM copies and sub-copies. + - Add support for host to host copies and sub-copies. |====================== diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 2b4e7a5b2aefe..d88b9c7ea1751 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1571,41 +1571,63 @@ inline event queue::ext_oneapi_copy( } inline event queue::ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.ext_oneapi_copy(Src, Dest, ImageDesc); + CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, + DeviceRowPitch, HostExtent, CopyExtent); }, TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const void *Src, void *Dest, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, const std::vector &DepEvents, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.ext_oneapi_copy(Src, Dest, ImageDesc); + CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, + DeviceRowPitch, HostExtent, CopyExtent); }, TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, - const detail::code_location &CodeLoc) { + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( - [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, ImageDesc); }, + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc); + }, TlsCodeLocCapture.query()); } @@ -1622,7 +1644,22 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const std::vector &DepEvents, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc); + }, + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1640,7 +1677,21 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc); + }, + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1658,51 +1709,294 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc, DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, sycl::range<3> CopyExtent, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, DestRowPitch, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, event DepEvent, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc, DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, DestRowPitch, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const std::vector &DepEvents, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcImgDesc, Dest, DestImgDesc, DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const std::vector &DepEvents, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, + DestImgDesc, DestRowPitch, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); - CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, - DeviceRowPitch, HostExtent, CopyExtent); + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc); }, TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( - const void *Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, const std::vector &DepEvents, + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const std::vector &DepEvents, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch); + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc); }, TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, sycl::range<3> CopyExtent, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const std::vector &DepEvents, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc, + DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, DestRowPitch, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, event DepEvent, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc, + DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, DestRowPitch, CopyExtent); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const std::vector &DepEvents, + const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.ext_oneapi_copy(Src, SrcImgDesc, SrcRowPitch, Dest, DestImgDesc, + DestRowPitch); + }, + TlsCodeLocCapture.query()); +} + +inline event queue::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); - CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, - DeviceRowPitch, HostExtent, CopyExtent); + CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, SrcRowPitch, Dest, + DestOffset, DestImgDesc, DestRowPitch, CopyExtent); }, TlsCodeLocCapture.query()); } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 552c6afe195be..a76d6002d9d87 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3032,38 +3032,38 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::graph_state::executable> Graph); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle. An exception is - /// thrown if either \p Src is nullptr or \p Dest is incomplete. The behavior - /// is undefined if \p Desc is inconsistent with the allocated memory region. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. An exception is thrown if either \p Src + /// is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p DestImgDesc is inconsistent with the allocated allocated memory + /// regions. /// /// \param Src is a USM pointer to the source memory. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param DestImgDesc is the image descriptor + /// \param DestImgDesc is the image descriptor. void ext_oneapi_copy( const void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. Pixel size is determined - /// by \p DestImgDesc - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DestImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an offset from the origin where the x, y, and z /// components are measured in bytes, rows, and slices - /// respectively - /// \param SrcExtent is the extent of the source memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// respectively. + /// \param SrcExtent is the size of the source memory, measured in + /// pixels. (Pixel size determined by \p DestImgDesc .) /// \param Dest is an opaque image memory handle to the destination memory. /// \param DestOffset is an offset from the destination origin measured in - /// pixels (pixel size determined by \p DestImgDesc ) - /// \param DestImgDesc is the destination image descriptor + /// pixels. (Pixel size determined by \p DestImgDesc .) + /// \param DestImgDesc is the destination image descriptor. /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels as determined by \p DestImgDesc + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) void ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, @@ -3071,40 +3071,37 @@ class __SYCL_EXPORT handler { const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. - /// An exception is thrown if either \p Src is incomplete or \p Dest is - /// nullptr. The behavior is undefined if \p Desc is inconsistent with the - /// allocated memory region. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc is inconsistent with the allocated memory regions. /// /// \param Src is an opaque image memory handle to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param SrcImgDesc is the source image descriptor + /// \param SrcImgDesc is the source image descriptor. void ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p Extent are - /// used to determine the sub-region. Pixel size is determined - /// by \p SrcImgDesc - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the source origin measured in pixels - /// (pixel size determined by \p SrcImgDesc ) - /// \param SrcImgDesc is the source image descriptor + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is a USM pointer to the destination memory. /// \param DestOffset is an offset from the destination origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DestExtent is the extent of the dest memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DestExtent is the size of the destination memory, measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels (pixel size determined by - /// \p SrcImgDesc ) + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) void ext_oneapi_copy(const ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset, @@ -3112,87 +3109,232 @@ class __SYCL_EXPORT handler { void *Dest, sycl::range<3> DestOffset, sycl::range<3> DestExtent, sycl::range<3> CopyExtent); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. An exception is thrown if either \p Src is nullptr, \p - /// Dest is nullptr, or \p Pitch is inconsistent with hardware requirements. - /// The behavior is undefined if \p Desc is inconsistent with the allocated - /// memory region. + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. An exception is thrown if either \p Src is + /// nullptr or \p Dest is nullptr. The behavior is undefined if + /// \p DeviceImgDesc is inconsistent with the allocated memory regions or + /// \p DeviceRowPitch is inconsistent with hardware requirements. /// /// \param Src is a USM pointer to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DeviceImgDesc is the image descriptor (format, order, dimensions). - /// \param DeviceRowPitch is the pitch of the rows on the device. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the memory on the + /// device. void ext_oneapi_copy( const void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. - /// An exception is thrown if either \p Src or \p Dest is incomplete + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DeviceImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is nullptr. The behavior is + /// undefined if \p DeviceRowPitch is inconsistent with hardware requirements + /// or \p HostExtent is inconsistent with its respective memory region. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the image on the device. + /// \param HostExtent is the size of the host memory, measured in pixels. + /// (Pixel size determined by \p DeviceImgDesc .) + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p DeviceImgDesc .) + void ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, + sycl::range<3> CopyExtent); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. An exception is thrown if either \p Src or \p Dest + /// are incomplete. The behavior is undefined if \p SrcImgDesc or + /// \p DestImgDesc are inconsistent with their respective allocated memory + /// regions. /// /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param ImageDesc is the source image descriptor - void - ext_oneapi_copy(const ext::oneapi::experimental::image_mem_handle Src, - ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc); + /// \param DestImgDesc is the destination image descriptor. + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. Allows for a sub-region copy, where \p SrcOffset , + /// \p DestOffset and \p CopyExtent are used to determine the sub-region. + /// Pixel size is determined by \p SrcImgDesc . An exception is thrown if + /// either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. Allows for a sub-region copy, where + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the - /// sub-region. Pixel size is determined by \p SrcImgDesc - /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is incomplete or \p Dest is nullptr. The behavior + /// is undefined if \p DestRowPitch is inconsistent with hardware + /// requirements. /// /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in Pixels + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + void ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. An exception is thrown if either + /// \p Src is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + void ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. Allows for a sub-region + /// copy, where \p SrcOffset, \p DestOffset and \p CopyExtent are used to + /// determine the sub-region. Pixel size is determined by \p SrcImgDesc . An + /// exception is thrown if either \p Src is nullptr or \p Dest is incomplete. + /// The behavior is undefined if \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an offset from the source origin measured in pixels /// (pixel size determined by \p SrcImgDesc ) /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param Dest is an opaque image memory handle to the destination memory. /// \param DestOffset is an offset from the destination origin measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// pixels (pixel size determined by \p SrcImgDesc ) /// \param DestImgDesc is the destination image descriptor /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by /// \p SrcImgDesc ) void ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, + const void *Src, sycl::range<3> SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , - /// \p DestOffset , and \p Extent are used to determine the sub-region. - /// Pixel size is determined by \p DestImgDesc - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. An exception is thrown if either \p Src or \p Dest are nullptr. + /// The behavior is undefined if \p SrcImgDesc or \p DestImgDesc are + /// inconsistent with their respective allocated memory regions or + /// \p SrcRowPitch or \p DestRowPitch are inconsistent with hardware + /// requirements. /// /// \param Src is a USM pointer to the source memory. - /// \param SrcOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DestOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DeviceImgDesc is the device image descriptor - /// \param DeviceRowPitch is the row pitch on the device - /// \param HostExtent is the extent of the dest memory to copy, measured in - /// pixels (pixel size determined by \p DeviceImgDesc ) + /// \param DestImgDesc is the destination image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the destination memory. + void ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch); + + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. Allows for a sub-region copy, where \p SrcOffset, \p DestOffset + /// and \p CopyExtent are used to determine the sub-region. Pixel size is + /// determined by \p SrcImgDesc . An exception is thrown if either \p Src or + /// \p Dest are nullptr. The behavior is undefined if \p SrcRowPitch or + /// \p DestRowPitch are inconsistent with hardware requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param DestRowPitch is the pitch of the rows of the destination memory. /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by - /// \p DeviceImgDesc ) + /// \p SrcImgDesc ) void ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, - sycl::range<3> CopyExtent); + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent); /// Submit a non-blocking device-side wait on an external // semaphore to the queue. diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 26f2d2af949b9..9c83bc3cac5ba 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -1377,40 +1377,40 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { StartIndex * sizeof(std::remove_all_extents_t)); } - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle wrapper. An exception - /// is thrown if either \p Src is nullptr or \p Dest is incomplete. The - /// behavior is undefined if \p DestImgDesc is inconsistent with the allocated - /// memory region. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. An exception is thrown if either \p Src + /// is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p DestImgDesc is inconsistent with the allocated allocated memory + /// regions. /// /// \param Src is a USM pointer to the source memory. - /// \param Dest is a wrapper for an opaque image memory handle to the - /// destination memory. - /// \param DestImgDesc is the image descriptor (format, order, dimensions). + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the image descriptor. /// \return an event representing the copy operation. event ext_oneapi_copy( const void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. An exception is thrown if either \p - /// Src is nullptr or \p CopyExtent is incomplete. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DestImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an offset from the origin where the x, y, and z /// components are measured in bytes, rows, and slices - /// respectively - /// \param SrcExtent is the extent of the source memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// respectively. + /// \param SrcExtent is the size of the source memory, measured in + /// pixels. (Pixel size determined by \p DestImgDesc .) /// \param Dest is an opaque image memory handle to the destination memory. /// \param DestOffset is an offset from the destination origin measured in - /// pixels (pixel size determined by \p DestImgDesc ) - /// \param DestImgDesc is the destination image descriptor + /// pixels. (Pixel size determined by \p DestImgDesc .) + /// \param DestImgDesc is the destination image descriptor. /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels as determined by \p DestImgDesc + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \return an event representing the copy operation. event ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent, @@ -1420,16 +1420,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle wrapper. An exception - /// is thrown if either \p Src is nullptr or \p Dest is incomplete. The - /// behavior is undefined if \p DestImgDesc is inconsistent with the allocated - /// memory region. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. An exception is thrown if either \p Src + /// is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p DestImgDesc is inconsistent with the allocated allocated memory + /// regions. /// /// \param Src is a USM pointer to the source memory. - /// \param Dest is a wrapper for an opaque image memory handle to the - /// destination memory. - /// \param DestImgDesc is the destination image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the image descriptor. /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( @@ -1438,24 +1437,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. An exception is thrown if either \p - /// Src is nullptr or \p Dest is incomplete. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DestImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an offset from the origin where the x, y, and z /// components are measured in bytes, rows, and slices - /// respectively - /// \param SrcExtent is the extent of the source memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// respectively. + /// \param SrcExtent is the size of the source memory, measured in + /// pixels. (Pixel size determined by \p DestImgDesc .) /// \param Dest is an opaque image memory handle to the destination memory. /// \param DestOffset is an offset from the destination origin measured in - /// pixels (pixel size determined by \p DestImgDesc ) - /// \param DestImgDesc is the destination image descriptor + /// pixels. (Pixel size determined by \p DestImgDesc .) + /// \param DestImgDesc is the destination image descriptor. /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels as determined by \p DestImgDesc + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( @@ -1466,16 +1466,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle wrapper. An exception - /// is thrown if either \p Src is nullptr or \p Dest is incomplete. The - /// behavior is undefined if \p DestImgDesc is inconsistent with the allocated - /// memory region. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. An exception is thrown if either \p Src + /// is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p DestImgDesc is inconsistent with the allocated allocated memory + /// regions. /// /// \param Src is a USM pointer to the source memory. - /// \param Dest is a wrapper for an opaque image memory handle to the - /// destination memory. - /// \param DestImgDesc is the destination image descriptor + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the image descriptor. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. @@ -1485,24 +1484,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is a USM - /// pointer and \p Dest is an opaque image memory handle. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. An exception is thrown if either \p - /// Src is nullptr or \p Dest is incomplete. + /// Copies data from host to device, where \p Src is a USM pointer and \p Dest + /// is an opaque image memory handle. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DestImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an offset from the origin where the x, y, and z /// components are measured in bytes, rows, and slices - /// respectively - /// \param SrcExtent is the extent of the source memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// respectively. + /// \param SrcExtent is the size of the source memory, measured in + /// pixels. (Pixel size determined by \p DestImgDesc .) /// \param Dest is an opaque image memory handle to the destination memory. /// \param DestOffset is an offset from the destination origin measured in - /// pixels (pixel size determined by \p DestImgDesc ) - /// \param DestImgDesc is the destination image descriptor + /// pixels. (Pixel size determined by \p DestImgDesc .) + /// \param DestImgDesc is the destination image descriptor. /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels as determined by \p DestImgDesc + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. @@ -1514,11 +1514,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. - /// An exception is thrown if either \p Src is incomplete or \p Dest is - /// nullptr. The behavior is undefined if \p SrcImgDesc is inconsistent with - /// the allocated memory region. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc is inconsistent with the allocated memory regions. /// /// \param Src is an opaque image memory handle to the source memory. /// \param Dest is a USM pointer to the destination memory. @@ -1529,26 +1528,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. Pixel size is determined by \p - /// SrcImgDesc An exception is thrown if either \p Src is nullptr or \p Dest - /// is incomplete. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels - /// (pixel size determined by \p SrcImgDesc ) - /// \param SrcImgDesc is the source image descriptor + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is a USM pointer to the destination memory. /// \param DestOffset is an offset from the destination origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DestExtent is the extent of the dest memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DestExtent is the size of the destination memory, measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels (pixel size determined by - /// \p SrcImgDesc ) + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \return an event representing the copy operation. event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, @@ -1558,15 +1556,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. - /// An exception is thrown if either \p Src is incomplete or \p Dest is - /// nullptr. The behavior is undefined if \p SrcImgDesc is inconsistent with - /// the allocated memory region. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc is inconsistent with the allocated memory regions. /// /// \param Src is an opaque image memory handle to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param SrcImgDesc is the image descriptor (format, order, dimensions). + /// \param SrcImgDesc is the source image descriptor. /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( @@ -1575,26 +1572,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. Pixel size is determined by \p - /// SrcImgDesc An exception is thrown if either \p Src is nullptr or \p Dest - /// is incomplete. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels - /// (pixel size determined by \p SrcImgDesc ) - /// \param SrcImgDesc is the source image descriptor + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is a USM pointer to the destination memory. /// \param DestOffset is an offset from the destination origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DestExtent is the extent of the dest memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DestExtent is the size of the destination memory, measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels (pixel size determined by - /// \p SrcImgDesc ) + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( @@ -1605,15 +1601,14 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. - /// An exception is thrown if either \p Src is incomplete or \p Dest is - /// nullptr. The behavior is undefined if \p SrcImgDesc is inconsistent with - /// the allocated memory region. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc is inconsistent with the allocated memory regions. /// /// \param Src is an opaque image memory handle to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param SrcImgDesc is the image descriptor (format, order, dimensions). + /// \param SrcImgDesc is the source image descriptor. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. @@ -1623,26 +1618,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src is an opaque - /// image memory handle and \p Dest is a USM pointer. Allows for a - /// sub-region copy, where \p SrcOffset , \p DestOffset , and \p CopyExtent - /// are used to determine the sub-region. Pixel size is determined by \p - /// SrcImgDesc An exception is thrown if either \p Src is nullptr or \p Dest - /// is incomplete. + /// Copies data from device to host, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels - /// (pixel size determined by \p SrcImgDesc ) - /// \param SrcImgDesc is the source image descriptor + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is a USM pointer to the destination memory. /// \param DestOffset is an offset from the destination origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DestExtent is the extent of the dest memory to copy, measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DestExtent is the size of the destination memory, measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels (pixel size determined by - /// \p SrcImgDesc ) + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. @@ -1654,16 +1648,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. An exception is thrown if either \p Src is nullptr, \p - /// Dest is nullptr, or \p Pitch is inconsistent with hardware requirements. - /// The behavior is undefined if \p DeviceImgDesc is inconsistent with the - /// allocated memory region. + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. An exception is thrown if either \p Src is + /// nullptr or \p Dest is nullptr. The behavior is undefined if + /// \p DeviceImgDesc is inconsistent with the allocated memory regions or + /// \p DeviceRowPitch is inconsistent with hardware requirements. /// /// \param Src is a USM pointer to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DeviceImgDesc is the image descriptor - /// \param DeviceRowPitch is the DeviceRowPitch of the rows on the device. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the memory on the + /// device. /// \return an event representing the copy operation. event ext_oneapi_copy( const void *Src, void *Dest, @@ -1671,27 +1666,29 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { size_t DeviceRowPitch, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , - /// \p DestOffset , and \p Extent are used to determine the sub-region. - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DeviceImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is nullptr. The behavior is + /// undefined if \p DeviceRowPitch is inconsistent with hardware requirements + /// or \p HostExtent is inconsistent with its respective memory region. /// /// \param Src is a USM pointer to the source memory. /// \param SrcOffset is an destination offset from the origin where the /// x, y, and z components are measured in bytes, rows, - /// and slices respectively + /// and slices respectively. /// \param Dest is a USM pointer to the destination memory. /// \param DestOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DeviceImgDesc is the device image descriptor - /// \param DeviceRowPitch is the row pitch on the device - /// \param HostExtent is the extent of the host memory to copy, measured in - /// pixels (pixel size determined by \p DeviceImgDesc ) + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the image on the device. + /// \param HostExtent is the size of the host memory, measured in pixels. + /// (Pixel size determined by \p DeviceImgDesc .) /// \param CopyExtent is the width, height, and depth of the region to copy - /// measured in pixels (pixel size determined by - /// \p DeviceImgDesc ) + /// measured in pixels. (Pixel size determined by + /// \p DeviceImgDesc .) /// \return an event representing the copy operation. event ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, void *Dest, @@ -1701,16 +1698,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { sycl::range<3> CopyExtent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. An exception is thrown if either \p Src is nullptr, \p - /// Dest is nullptr, or \p Pitch is inconsistent with hardware requirements. - /// The behavior is undefined if \p DeviceImgDesc is inconsistent with the - /// allocated memory region. + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. An exception is thrown if either \p Src is + /// nullptr or \p Dest is nullptr. The behavior is undefined if + /// \p DeviceImgDesc is inconsistent with the allocated memory regions or + /// \p DeviceRowPitch is inconsistent with hardware requirements. /// /// \param Src is a USM pointer to the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DeviceImgDesc is the image descriptor - /// \param DeviceRowPitch is the pitch of the rows on the device. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the memory on the + /// device. /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( @@ -1719,94 +1717,484 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { size_t DeviceRowPitch, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. - /// An exception is thrown if either \p Src or \p Dest is incomplete + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DeviceImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is nullptr. The behavior is + /// undefined if \p DeviceRowPitch is inconsistent with hardware requirements + /// or \p HostExtent is inconsistent with its respective memory region. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the image on the device. + /// \param HostExtent is the size of the host memory, measured in pixels. + /// (Pixel size determined by \p DeviceImgDesc .) + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p DeviceImgDesc .) + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, + sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. An exception is thrown if either \p Src is + /// nullptr or \p Dest is nullptr. The behavior is undefined if + /// \p DeviceImgDesc is inconsistent with the allocated memory regions or + /// \p DeviceRowPitch is inconsistent with hardware requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param Dest is a USM pointer to the destination memory. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the memory on the + /// device. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, void *Dest, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from host to device or device to host, where \p Src and + /// \p Dest are USM pointers. Allows for a sub-region copy, where + /// \p SrcOffset , \p DestOffset , and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p DeviceImgDesc . An exception is + /// thrown if either \p Src is nullptr or \p Dest is nullptr. The behavior is + /// undefined if \p DeviceRowPitch is inconsistent with hardware requirements + /// or \p HostExtent is inconsistent with its respective memory region. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an destination offset from the origin where the + /// x, y, and z components are measured in bytes, rows, + /// and slices respectively. + /// \param DeviceImgDesc is the device image descriptor. + /// \param DeviceRowPitch is the pitch of the rows of the image on the device. + /// \param HostExtent is the size of the host memory, measured in pixels. + /// (Pixel size determined by \p DeviceImgDesc .) + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p DeviceImgDesc .) + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, + sycl::range<3> CopyExtent, const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. An exception is thrown if either \p Src or \p Dest + /// are incomplete. The behavior is undefined if \p SrcImgDesc or + /// \p DestImgDesc are inconsistent with their respective allocated memory + /// regions. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. Allows for a sub-region copy, where \p SrcOffset , + /// \p DestOffset and \p CopyExtent are used to determine the sub-region. + /// Pixel size is determined by \p SrcImgDesc . An exception is thrown if + /// either \p Src or \p Dest is incomplete. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. An exception is thrown if either \p Src or \p Dest + /// are incomplete. The behavior is undefined if \p SrcImgDesc or + /// \p DestImgDesc are inconsistent with their respective allocated memory + /// regions. /// /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param ImageDesc is the source image descriptor + /// \param DestImgDesc is the destination image descriptor. /// \param DepEvent is an events that specifies the kernel dependency. /// \return an event representing the copy operation. event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. - /// An exception is thrown if either \p Src or \p Dest is incomplete + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. Allows for a sub-region copy, where \p SrcOffset , + /// \p DestOffset and \p CopyExtent are used to determine the sub-region. + /// Pixel size is determined by \p SrcImgDesc . An exception is thrown if + /// either \p Src or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param ImageDesc is the source image descriptor + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + ext::oneapi::experimental::image_mem_handle Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. An exception is thrown if either \p Src or \p Dest + /// are incomplete. The behavior is undefined if \p SrcImgDesc or + /// \p DestImgDesc are inconsistent with their respective allocated memory + /// regions. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. - /// An exception is thrown if either \p Src or \p Dest is incomplete + /// Copies data from device to device, where \p Src and \p Dest are opaque + /// image memory handles. Allows for a sub-region copy, where \p SrcOffset , + /// \p DestOffset and \p CopyExtent are used to determine the sub-region. + /// Pixel size is determined by \p SrcImgDesc . An exception is thrown if + /// either \p Src or \p Dest is incomplete. /// /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels. + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param ImageDesc is the source image descriptor + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. Allows for a sub-region copy, where + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the - /// sub-region. Pixel size is determined by \p SrcImgDesc - /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is incomplete or \p Dest is nullptr. The behavior + /// is undefined if \p DestRowPitch is inconsistent with hardware + /// requirements. /// /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels + /// \param SrcOffset is an offset from the source origin measured in Pixels + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is incomplete or \p Dest is nullptr. The behavior + /// is undefined if \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in Pixels + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. An exception is thrown if either + /// \p Src is incomplete or \p Dest is nullptr. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device, where \p Src is an opaque image memory + /// handle and \p Dest is a USM pointer. Allows for a sub-region copy, where + /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the + /// sub-region. Pixel size is determined by \p SrcImgDesc . An exception is + /// thrown if either \p Src is incomplete or \p Dest is nullptr. The behavior + /// is undefined if \p DestRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is an opaque image memory handle to the source memory. + /// \param SrcOffset is an offset from the source origin measured in Pixels + /// (Pixel size determined by \p SrcImgDesc .) + /// \param SrcImgDesc is the source image descriptor + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels. (Pixel size determined by \p SrcImgDesc .) + /// \param DestImgDesc is the destination image descriptor. + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels. (Pixel size determined by + /// \p SrcImgDesc .) + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. An exception is thrown if either + /// \p Src is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. Allows for a sub-region + /// copy, where \p SrcOffset, \p DestOffset and \p CopyExtent are used to + /// determine the sub-region. Pixel size is determined by \p SrcImgDesc . An + /// exception is thrown if either \p Src is nullptr or \p Dest is incomplete. + /// The behavior is undefined if \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels /// (pixel size determined by \p SrcImgDesc ) /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param DestOffset is an offset from the origin of destination measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) /// \param DestImgDesc is the destination image descriptor /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by /// \p SrcImgDesc ) /// \return an event representing the copy operation. event ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, + const void *Src, sycl::range<3> SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. Allows for a sub-region copy, where - /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the - /// sub-region. Pixel size is determined by \p SrcImgDesc - /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. An exception is thrown if either + /// \p Src is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p SrcRowPitch is inconsistent with hardware + /// requirements. /// - /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels + /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. Allows for a sub-region + /// copy, where \p SrcOffset, \p DestOffset and \p CopyExtent are used to + /// determine the sub-region. Pixel size is determined by \p SrcImgDesc . An + /// exception is thrown if either \p Src is nullptr or \p Dest is incomplete. + /// The behavior is undefined if \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels /// (pixel size determined by \p SrcImgDesc ) /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param DestOffset is an offset from the origin of destination measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) /// \param DestImgDesc is the destination image descriptor /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by @@ -1814,28 +2202,53 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, + const void *Src, sycl::range<3> SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from device to device memory, where \p Src and \p Dest - /// are opaque image memory handles. Allows for a sub-region copy, where - /// \p SrcOffset, \p DestOffset and \p CopyExtent are used to determine the - /// sub-region. Pixel size is determined by \p SrcImgDesc - /// An exception is thrown if either \p Src or \p Dest is incomplete. + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. An exception is thrown if either + /// \p Src is nullptr or \p Dest is incomplete. The behavior is undefined if + /// \p SrcImgDesc or \p DestImgDesc are inconsistent with their respective + /// allocated memory regions or \p SrcRowPitch is inconsistent with hardware + /// requirements. /// - /// \param Src is an opaque image memory handle to the source memory. - /// \param SrcOffset is an offset from the origin of source measured in pixels + /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. + /// \param Dest is an opaque image memory handle to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + const std::vector &DepEvents, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from device to device memory, where \p Src is USM pointer and + /// \p Dest is an opaque image memory handle. Allows for a sub-region + /// copy, where \p SrcOffset, \p DestOffset and \p CopyExtent are used to + /// determine the sub-region. Pixel size is determined by \p SrcImgDesc . An + /// exception is thrown if either \p Src is nullptr or \p Dest is incomplete. + /// The behavior is undefined if \p SrcRowPitch is inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels /// (pixel size determined by \p SrcImgDesc ) - /// \param srcImgDesc is the source image descriptor + /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param Dest is an opaque image memory handle to the destination memory. - /// \param DestOffset is an offset from the origin of destination measured in - /// pixels (pixel size determined by \p DestImgDesc ) + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) /// \param DestImgDesc is the destination image descriptor /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by @@ -1844,95 +2257,172 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, - sycl::range<3> SrcOffset, + const void *Src, sycl::range<3> SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, - ext::oneapi::experimental::image_mem_handle Dest, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , - /// \p DestOffset , and \p Extent are used to determine the sub-region. - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. An exception is thrown if either \p Src or \p Dest are nullptr. + /// The behavior is undefined if \p SrcImgDesc or \p DestImgDesc are + /// inconsistent with their respective allocated memory regions or + /// \p SrcRowPitch or \p DestRowPitch are inconsistent with hardware + /// requirements. /// /// \param Src is a USM pointer to the source memory. - /// \param SrcOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DestOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DeviceImgDesc is the destination image descriptor - /// \param DeviceRowPitch is the row pitch on the device - /// \param HostExtent is the extent of the host memory to copy, measured in - /// pixels (pixel size determined by \p DeviceImgDesc ) + /// \param DestImgDesc is the destination image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the destination memory. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. Allows for a sub-region copy, where \p SrcOffset, \p DestOffset + /// and \p CopyExtent are used to determine the sub-region. Pixel size is + /// determined by \p SrcImgDesc . An exception is thrown if either \p Src or + /// \p Dest are nullptr. The behavior is undefined if \p SrcRowPitch or + /// \p DestRowPitch are inconsistent with hardware requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param DestRowPitch is the pitch of the rows of the destination memory. /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by - /// \p DeviceImgDesc ) + /// \p SrcImgDesc ) + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. An exception is thrown if either \p Src or \p Dest are nullptr. + /// The behavior is undefined if \p SrcImgDesc or \p DestImgDesc are + /// inconsistent with their respective allocated memory regions or + /// \p SrcRowPitch or \p DestRowPitch are inconsistent with hardware + /// requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestImgDesc is the destination image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param DepEvent is an event that specifies the kernel dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, - sycl::range<3> CopyExtent, event DepEvent, + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, event DepEvent, + const detail::code_location &CodeLoc = detail::code_location::current()); + + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. Allows for a sub-region copy, where \p SrcOffset, \p DestOffset + /// and \p CopyExtent are used to determine the sub-region. Pixel size is + /// determined by \p SrcImgDesc . An exception is thrown if either \p Src or + /// \p Dest are nullptr. The behavior is undefined if \p SrcRowPitch or + /// \p DestRowPitch are inconsistent with hardware requirements. + /// + /// \param Src is a USM pointer to the source memory. + /// \param SrcOffset is an offset from the source origin measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. + /// \param Dest is a USM pointer to the destination memory. + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param DestRowPitch is the pitch of the rows of the destination memory. + /// \param CopyExtent is the width, height, and depth of the region to copy + /// measured in pixels (pixel size determined by + /// \p SrcImgDesc ) + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing the copy operation. + event ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, event DepEvent, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. An exception is thrown if either \p Src is nullptr, \p - /// Dest is nullptr, or \p Pitch is inconsistent with hardware requirements. - /// The behavior is undefined if \p DeviceImgDesc is inconsistent with the - /// allocated memory region. + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. An exception is thrown if either \p Src or \p Dest are nullptr. + /// The behavior is undefined if \p SrcImgDesc or \p DestImgDesc are + /// inconsistent with their respective allocated memory regions or + /// \p SrcRowPitch or \p DestRowPitch are inconsistent with hardware + /// requirements. /// /// \param Src is a USM pointer to the source memory. + /// \param SrcImgDesc is the source image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the source memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DeviceImgDesc is the image descriptor - /// \param DeviceRowPitch is the pitch of the rows on the device. + /// \param DestImgDesc is the destination image descriptor. + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param DepEvents is a vector of events that specifies the kernel - /// dependencies. + /// dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( - const void *Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, const std::vector &DepEvents, + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); - /// Copies data from one memory region to another, where \p Src and \p Dest - /// are USM pointers. Allows for a sub-region copy, where \p SrcOffset , - /// \p DestOffset , and \p Extent are used to determine the sub-region. - /// An exception is thrown if either \p Src is nullptr or \p Dest is - /// incomplete. + /// Copies data from DtoD or HtoH memory, where \p Src and \p Dest are USM + /// pointers. Allows for a sub-region copy, where \p SrcOffset, \p DestOffset + /// and \p CopyExtent are used to determine the sub-region. Pixel size is + /// determined by \p SrcImgDesc . An exception is thrown if either \p Src or + /// \p Dest are nullptr. The behavior is undefined if \p SrcRowPitch or + /// \p DestRowPitch are inconsistent with hardware requirements. /// /// \param Src is a USM pointer to the source memory. - /// \param SrcOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively + /// \param SrcOffset is an offset from the source origin measured in pixels + /// (pixel size determined by \p SrcImgDesc ) + /// \param SrcImgDesc is the source image descriptor + /// \param SrcRowPitch is the pitch of the rows of the destination memory. /// \param Dest is a USM pointer to the destination memory. - /// \param DestOffset is an destination offset from the origin where the - /// x, y, and z components are measured in bytes, rows, - /// and slices respectively - /// \param DeviceImgDesc is the destination image descriptor - /// \param DeviceRowPitch is the row pitch on the device - /// \param HostExtent is the extent of the host memory to copy, measured in - /// pixels (pixel size determined by \p DeviceImgDesc ) + /// \param DestOffset is an offset from the destination origin measured in + /// pixels (pixel size determined by \p SrcImgDesc ) + /// \param DestImgDesc is the destination image descriptor + /// \param DestRowPitch is the pitch of the rows of the destination memory. /// \param CopyExtent is the width, height, and depth of the region to copy /// measured in pixels (pixel size determined by - /// \p DeviceImgDesc ) + /// \p SrcImgDesc ) /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. /// \return an event representing the copy operation. event ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, - sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, - sycl::range<3> CopyExtent, const std::vector &DepEvents, + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent, + const std::vector &DepEvents, const detail::code_location &CodeLoc = detail::code_location::current()); /// Instruct the queue with a non-blocking wait on an external semaphore. diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 3069595d2b6d8..261b97966ae40 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1644,7 +1644,8 @@ void MemoryManager::copy_image_bindless( "Copy image bindless must be called with a valid device queue"); assert((Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || - Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE) && + Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || + Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) && "Invalid flags passed to copy_image_bindless."); if (!Dst || !Src) throw sycl::exception( diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 19d0fe95706cf..9c410aeb3a2ac 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -49,8 +49,8 @@ bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) { return DGEntry && !DGEntry->MImageIdentifiers.empty(); } -ur_exp_image_copy_flags_t getUrImageCopyFlags(sycl::usm::alloc SrcPtrType, - sycl::usm::alloc DstPtrType) { +static ur_exp_image_copy_flags_t +getUrImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType) { if (DstPtrType == sycl::usm::alloc::device) { // Dest is on device if (SrcPtrType == sycl::usm::alloc::device) @@ -68,8 +68,7 @@ ur_exp_image_copy_flags_t getUrImageCopyFlags(sycl::usm::alloc SrcPtrType, return UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; if (SrcPtrType == sycl::usm::alloc::host || SrcPtrType == sycl::usm::alloc::unknown) - throw sycl::exception(make_error_code(errc::invalid), - "Cannot copy image from host to host"); + return UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST; throw sycl::exception(make_error_code(errc::invalid), "Unknown copy source location"); } @@ -83,6 +82,222 @@ void *getValueFromDynamicParameter( return sycl::detail::getSyclObjImpl(DynamicParamBase)->getValue(); } +// Bindless image helpers + +// Fill image type and return depth or array_size +static unsigned int +fill_image_type(const ext::oneapi::experimental::image_descriptor &Desc, + ur_image_desc_t &UrDesc) { + if (Desc.array_size > 1) { + // Image Array. + UrDesc.type = + Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; + + // Cubemap. + UrDesc.type = + Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap + ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP + : UrDesc.type; + + return Desc.array_size; + } + + UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D + : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D + : UR_MEM_TYPE_IMAGE1D); + return Desc.depth; +} + +// Fill image format +static ur_image_format_t +fill_format(const ext::oneapi::experimental::image_descriptor &Desc) { + ur_image_format_t PiFormat; + + PiFormat.channelType = + sycl::_V1::detail::convertChannelType(Desc.channel_type); + PiFormat.channelOrder = sycl::detail::convertChannelOrder( + sycl::_V1::ext::oneapi::experimental::detail:: + get_image_default_channel_order(Desc.num_channels)); + + return PiFormat; +} + +static void +verify_copy(const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc) { + + if (SrcImgDesc.width != DestImgDesc.width || + SrcImgDesc.height != DestImgDesc.height || + SrcImgDesc.depth != DestImgDesc.depth) { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: The source image and the destination " + "image must have equal dimensions!"); + } + + if (SrcImgDesc.num_channels != DestImgDesc.num_channels) { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: The source image and the destination " + "image must have the same number of channels!"); + } +} + +static void +verify_sub_copy(const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + sycl::range<3> DestOffset, sycl::range<3> CopyExtent) { + + auto isOutOfRange = [](const sycl::range<3> &range, + const sycl::range<3> &offset, + const sycl::range<3> ©Extent) { + sycl::range<3> result = (range > 0UL && ((offset + copyExtent) > range)); + + return (static_cast(result[0]) || static_cast(result[1]) || + static_cast(result[2])); + }; + + sycl::range<3> SrcImageSize = {SrcImgDesc.width, SrcImgDesc.height, + SrcImgDesc.depth}; + sycl::range<3> DestImageSize = {DestImgDesc.width, DestImgDesc.height, + DestImgDesc.depth}; + + if (isOutOfRange(SrcImageSize, SrcOffset, CopyExtent) || + isOutOfRange(DestImageSize, DestOffset, CopyExtent)) { + throw sycl::exception( + make_error_code(errc::invalid), + "Copy Error: Image copy attempted to access out of bounds memory!"); + } + + if (SrcImgDesc.num_channels != DestImgDesc.num_channels) { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: The source image and the destination " + "image must have the same number of channels!"); + } +} + +static ur_image_desc_t +fill_image_desc(const ext::oneapi::experimental::image_descriptor &ImgDesc) { + ur_image_desc_t UrDesc = {}; + UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; + UrDesc.width = ImgDesc.width; + UrDesc.height = ImgDesc.height; + UrDesc.depth = ImgDesc.depth; + UrDesc.arraySize = ImgDesc.array_size; + return UrDesc; +} + +static void +fill_copy_args(std::shared_ptr impl, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, + sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { + SrcImgDesc.verify(); + DestImgDesc.verify(); + + // CopyExtent.size() should only be greater than 0 when sub-copy is occurring. + if (CopyExtent.size() == 0) { + detail::verify_copy(SrcImgDesc, DestImgDesc); + } else { + detail::verify_sub_copy(SrcImgDesc, SrcOffset, DestImgDesc, DestOffset, + CopyExtent); + } + + ur_image_desc_t UrSrcDesc = detail::fill_image_desc(SrcImgDesc); + ur_image_desc_t UrDestDesc = detail::fill_image_desc(DestImgDesc); + ur_image_format_t UrSrcFormat = detail::fill_format(SrcImgDesc); + ur_image_format_t UrDestFormat = detail::fill_format(DestImgDesc); + auto ZCopyExtentComponent = detail::fill_image_type(SrcImgDesc, UrSrcDesc); + detail::fill_image_type(DestImgDesc, UrDestDesc); + + impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; + impl->MSrcImageDesc = UrSrcDesc; + impl->MDstImageDesc = UrDestDesc; + impl->MSrcImageFormat = UrSrcFormat; + impl->MDstImageFormat = UrDestFormat; + impl->MImageCopyFlags = ImageCopyFlags; + + if (CopyExtent.size() != 0) { + impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; + } else { + impl->MCopyExtent = {SrcImgDesc.width, SrcImgDesc.height, + ZCopyExtentComponent}; + } + + if (SrcExtent.size() != 0) { + impl->MSrcImageDesc.width = SrcExtent[0]; + impl->MSrcImageDesc.height = SrcExtent[1]; + impl->MSrcImageDesc.depth = SrcExtent[2]; + } + + if (DestExtent.size() != 0) { + impl->MDstImageDesc.width = DestExtent[0]; + impl->MDstImageDesc.height = DestExtent[1]; + impl->MDstImageDesc.depth = DestExtent[2]; + } + + if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + impl->MSrcImageDesc.rowPitch = 0; + impl->MDstImageDesc.rowPitch = DestPitch; + } else if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + impl->MSrcImageDesc.rowPitch = SrcPitch; + impl->MDstImageDesc.rowPitch = 0; + } else { + impl->MSrcImageDesc.rowPitch = SrcPitch; + impl->MDstImageDesc.rowPitch = DestPitch; + } +} + +static void +fill_copy_args(std::shared_ptr impl, + const ext::oneapi::experimental::image_descriptor &Desc, + ur_exp_image_copy_flags_t ImageCopyFlags, + sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, + sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { + + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, 0 /*SrcPitch*/, + 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, + CopyExtent); +} + +static void +fill_copy_args(std::shared_ptr impl, + const ext::oneapi::experimental::image_descriptor &Desc, + ur_exp_image_copy_flags_t ImageCopyFlags, size_t SrcPitch, + size_t DestPitch, sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, + sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { + + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, + SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); +} + +static void +fill_copy_args(std::shared_ptr impl, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + ur_exp_image_copy_flags_t ImageCopyFlags, + sycl::range<3> SrcOffset = {0, 0, 0}, + sycl::range<3> SrcExtent = {0, 0, 0}, + sycl::range<3> DestOffset = {0, 0, 0}, + sycl::range<3> DestExtent = {0, 0, 0}, + sycl::range<3> CopyExtent = {0, 0, 0}) { + + fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, 0 /*SrcPitch*/, + 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, + CopyExtent); +} + } // namespace detail handler::handler(std::shared_ptr Queue, @@ -1026,62 +1241,23 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, setType(detail::CGType::Memset2DUSM); } +// Simple host to device copy void handler::ext_oneapi_copy( const void *Src, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &Desc) { + const ext::oneapi::experimental::image_descriptor &DestImgDesc) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - Desc.verify(); - MSrcPtr = const_cast(Src); MDstPtr = reinterpret_cast(Dest.raw_handle); - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = Desc.width; - UrDesc.height = Desc.height; - UrDesc.depth = Desc.depth; - UrDesc.arraySize = Desc.array_size; - - if (Desc.array_size > 1) { - // Image Array. - UrDesc.type = - Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; - - // Cubemap. - UrDesc.type = - Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - - // Array size is depth extent. - impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size}; - } else { - UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - - impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - } + detail::fill_copy_args(impl, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE); - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(Desc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(Desc.num_channels)); - - impl->MSrcOffset = {0, 0, 0}; - impl->MDestOffset = {0, 0, 0}; - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; setType(detail::CGType::CopyImage); } +// Host to device copy with offsets and extent void handler::ext_oneapi_copy( const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, @@ -1090,170 +1266,134 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - DestImgDesc.verify(); - MSrcPtr = const_cast(Src); MDstPtr = reinterpret_cast(Dest.raw_handle); - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = DestImgDesc.width; - UrDesc.height = DestImgDesc.height; - UrDesc.depth = DestImgDesc.depth; - UrDesc.arraySize = DestImgDesc.array_size; + detail::fill_copy_args(impl, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE, SrcOffset, + SrcExtent, DestOffset, {0, 0, 0}, CopyExtent); - if (DestImgDesc.array_size > 1) { - // Image Array. - UrDesc.type = DestImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY - : UR_MEM_TYPE_IMAGE1D_ARRAY; + setType(detail::CGType::CopyImage); +} - // Cubemap. - UrDesc.type = - DestImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - } else { - UrDesc.type = DestImgDesc.depth > 0 - ? UR_MEM_TYPE_IMAGE3D - : (DestImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - } +// Simple device to host copy +void handler::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, void *Dest, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = Dest; - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(DestImgDesc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(DestImgDesc.num_channels)); + detail::fill_copy_args(impl, SrcImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST); - impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageDesc = UrDesc; - impl->MSrcImageDesc.width = SrcExtent[0]; - impl->MSrcImageDesc.height = SrcExtent[1]; - impl->MSrcImageDesc.depth = SrcExtent[2]; - impl->MDstImageDesc = UrDesc; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE; setType(detail::CGType::CopyImage); } +// Device to host copy with offsets and extent void handler::ext_oneapi_copy( - const ext::oneapi::experimental::image_mem_handle Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc) { + const ext::oneapi::experimental::image_mem_handle Src, + sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + sycl::range<3> DestOffset, sycl::range<3> DestExtent, + sycl::range<3> CopyExtent) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - Desc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = Desc.width; - UrDesc.height = Desc.height; - UrDesc.depth = Desc.depth; - UrDesc.arraySize = Desc.array_size; + detail::fill_copy_args(impl, SrcImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST, SrcOffset, + {0, 0, 0}, DestOffset, DestExtent, CopyExtent); - if (Desc.array_size > 1) { - // Image Array. - UrDesc.type = - Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; + setType(detail::CGType::CopyImage); +} - // Cubemap. - UrDesc.type = - Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; +// Simple HtoD or DtoH copy with USM device memory +void handler::ext_oneapi_copy( + const void *Src, void *Dest, + const ext::oneapi::experimental::image_descriptor &Desc, + size_t DeviceRowPitch) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); + MSrcPtr = const_cast(Src); + MDstPtr = Dest; - // Array size is depth extent. - impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size}; - } else { - UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( + get_pointer_type(Src, MQueue->get_context()), + get_pointer_type(Dest, MQueue->get_context())); - impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || + ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + detail::fill_copy_args(impl, Desc, ImageCopyFlags, DeviceRowPitch, + DeviceRowPitch); + } else { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs host " + "to device or device to host copies!"); } - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(Desc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(Desc.num_channels)); + setType(detail::CGType::CopyImage); +} + +// HtoD or DtoH copy with USM device memory, using offsets, extent +void handler::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, void *Dest, + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, + size_t DeviceRowPitch, sycl::range<3> HostExtent, + sycl::range<3> CopyExtent) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); + MSrcPtr = const_cast(Src); + MDstPtr = Dest; + + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( + get_pointer_type(Src, MQueue->get_context()), + get_pointer_type(Dest, MQueue->get_context())); + + // Fill the host extent based on the type of copy. + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + detail::fill_copy_args(impl, DeviceImgDesc, ImageCopyFlags, DeviceRowPitch, + DeviceRowPitch, SrcOffset, HostExtent, DestOffset, + {0, 0, 0}, CopyExtent); + } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + detail::fill_copy_args(impl, DeviceImgDesc, ImageCopyFlags, DeviceRowPitch, + DeviceRowPitch, SrcOffset, {0, 0, 0}, DestOffset, + HostExtent, CopyExtent); + } else { + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs host " + "to device or device to host copies!"); + } - impl->MSrcOffset = {0, 0, 0}; - impl->MDestOffset = {0, 0, 0}; - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; setType(detail::CGType::CopyImage); } +// Simple device to device copy void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, ext::oneapi::experimental::image_mem_handle Dest, - const ext::oneapi::experimental::image_descriptor &ImageDesc) { + const ext::oneapi::experimental::image_descriptor &DestImgDesc) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - ImageDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = reinterpret_cast(Dest.raw_handle); - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = ImageDesc.width; - UrDesc.height = ImageDesc.height; - UrDesc.depth = ImageDesc.depth; - UrDesc.arraySize = ImageDesc.array_size; - if (ImageDesc.array_size > 1) { - // Image Array. - UrDesc.type = ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY - : UR_MEM_TYPE_IMAGE1D_ARRAY; - - // Cubemap. - UrDesc.type = - ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - - // Array size is depth extent. - impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, - ImageDesc.array_size}; - } else { - UrDesc.type = ImageDesc.depth > 0 - ? UR_MEM_TYPE_IMAGE3D - : (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - - impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth}; - } + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE); - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(ImageDesc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(ImageDesc.num_channels)); - - impl->MSrcOffset = {0, 0, 0}; - impl->MDestOffset = {0, 0, 0}; - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; setType(detail::CGType::CopyImage); } +// Device to device copy with offsets and extent void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset, @@ -1264,286 +1404,149 @@ void handler::ext_oneapi_copy( throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - SrcImgDesc.verify(); - DestImgDesc.verify(); - - auto isOutOfRange = [](const sycl::range<3> &range, - const sycl::range<3> &offset, - const sycl::range<3> ©Extent) { - sycl::range<3> result = (range > 0UL && ((offset + copyExtent) > range)); - - return (static_cast(result[0]) || static_cast(result[1]) || - static_cast(result[2])); - }; - - sycl::range<3> SrcImageSize = {SrcImgDesc.width, SrcImgDesc.height, - SrcImgDesc.depth}; - sycl::range<3> DestImageSize = {DestImgDesc.width, DestImgDesc.height, - DestImgDesc.depth}; - - if (isOutOfRange(SrcImageSize, SrcOffset, CopyExtent) || - isOutOfRange(DestImageSize, DestOffset, CopyExtent)) { - throw sycl::exception( - make_error_code(errc::invalid), - "Image copy attempted to access out of bounds memory!"); - } - MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = reinterpret_cast(Dest.raw_handle); - ur_image_desc_t UrSrcDesc = {}; - UrSrcDesc.width = SrcImgDesc.width; - UrSrcDesc.height = SrcImgDesc.height; - UrSrcDesc.depth = SrcImgDesc.depth; - UrSrcDesc.arraySize = SrcImgDesc.array_size; - - ur_image_desc_t UrDestDesc = {}; - UrDestDesc.width = DestImgDesc.width; - UrDestDesc.height = DestImgDesc.height; - UrDestDesc.depth = DestImgDesc.depth; - UrDestDesc.arraySize = DestImgDesc.array_size; - - auto fill_image_type = - [](const ext::oneapi::experimental::image_descriptor &Desc, - ur_image_desc_t &UrDesc) { - if (Desc.array_size > 1) { - // Image Array. - UrDesc.type = Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY - : UR_MEM_TYPE_IMAGE1D_ARRAY; - - // Cubemap. - UrDesc.type = - Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - } else { - UrDesc.type = Desc.depth > 0 - ? UR_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - } - }; + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcOffset, + {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); - fill_image_type(SrcImgDesc, UrSrcDesc); - fill_image_type(DestImgDesc, UrDestDesc); - - auto fill_format = [](const ext::oneapi::experimental::image_descriptor &Desc, - ur_image_format_t &UrFormat) { - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(Desc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(Desc.num_channels)); - }; + setType(detail::CGType::CopyImage); +} - ur_image_format_t UrSrcFormat; - ur_image_format_t UrDestFormat; +// device to device image_mem_handle to usm +void handler::ext_oneapi_copy( + const ext::oneapi::experimental::image_mem_handle Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); + MSrcPtr = reinterpret_cast(Src.raw_handle); + MDstPtr = Dest; - fill_format(SrcImgDesc, UrSrcFormat); - fill_format(DestImgDesc, UrDestFormat); + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + DestRowPitch); - impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageDesc = UrSrcDesc; - impl->MDstImageDesc = UrDestDesc; - impl->MSrcImageFormat = UrSrcFormat; - impl->MDstImageFormat = UrDestFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE; setType(detail::CGType::CopyImage); } +// device to device image_mem_handle to usm sub copy void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_mem_handle Src, sycl::range<3> SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, - sycl::range<3> DestOffset, sycl::range<3> DestExtent, - sycl::range<3> CopyExtent) { + sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - SrcImgDesc.verify(); - MSrcPtr = reinterpret_cast(Src.raw_handle); MDstPtr = Dest; - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = SrcImgDesc.width; - UrDesc.height = SrcImgDesc.height; - UrDesc.depth = SrcImgDesc.depth; - UrDesc.arraySize = SrcImgDesc.array_size; - - if (SrcImgDesc.array_size > 1) { - // Image Array. - UrDesc.type = SrcImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY - : UR_MEM_TYPE_IMAGE1D_ARRAY; - - // Cubemap. - UrDesc.type = - SrcImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - } else { - UrDesc.type = SrcImgDesc.depth > 0 - ? UR_MEM_TYPE_IMAGE3D - : (SrcImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - } - - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(SrcImgDesc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(SrcImgDesc.num_channels)); + detail::fill_copy_args( + impl, SrcImgDesc, DestImgDesc, UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, 0, + DestRowPitch, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); - impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - impl->MDstImageDesc.width = DestExtent[0]; - impl->MDstImageDesc.height = DestExtent[1]; - impl->MDstImageDesc.depth = DestExtent[2]; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST; setType(detail::CGType::CopyImage); } +// device to device usm to image_mem_handle void handler::ext_oneapi_copy( - const void *Src, void *Dest, - const ext::oneapi::experimental::image_descriptor &Desc, size_t Pitch) { + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - Desc.verify(); - MSrcPtr = const_cast(Src); - MDstPtr = Dest; - - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = Desc.width; - UrDesc.height = Desc.height; - UrDesc.depth = Desc.depth; - UrDesc.arraySize = Desc.array_size; - - if (Desc.array_size > 1) { - // Image Array. - UrDesc.type = - Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY : UR_MEM_TYPE_IMAGE1D_ARRAY; - - // Cubemap. - UrDesc.type = - Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; - - // Array size is depth extent. - impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size}; - } else { - UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D - : (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); - - impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth}; - } + MDstPtr = reinterpret_cast(Dest.raw_handle); - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(Desc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(Desc.num_channels)); + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, + UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, SrcRowPitch, + 0); - impl->MSrcOffset = {0, 0, 0}; - impl->MDestOffset = {0, 0, 0}; - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MSrcImageDesc.rowPitch = Pitch; - impl->MDstImageDesc.rowPitch = Pitch; - impl->MImageCopyFlags = detail::getUrImageCopyFlags( - get_pointer_type(Src, MQueue->get_context()), - get_pointer_type(Dest, MQueue->get_context())); setType(detail::CGType::CopyImage); } +// device to device usm to image_mem_handle sub copy void handler::ext_oneapi_copy( - const void *Src, sycl::range<3> SrcOffset, void *Dest, + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, - const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, - size_t DeviceRowPitch, sycl::range<3> HostExtent, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent) { throwIfGraphAssociated< ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: sycl_ext_oneapi_bindless_images>(); - DeviceImgDesc.verify(); + MSrcPtr = const_cast(Src); + MDstPtr = reinterpret_cast(Dest.raw_handle); + + detail::fill_copy_args( + impl, SrcImgDesc, DestImgDesc, UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE, + SrcRowPitch, 0, SrcOffset, {0, 0, 0}, DestOffset, {0, 0, 0}, CopyExtent); + + setType(detail::CGType::CopyImage); +} +// Simple DtoD or HtoH USM to USM copy +void handler::ext_oneapi_copy( + const void *Src, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = const_cast(Src); MDstPtr = Dest; - ur_image_desc_t UrDesc = {}; - UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC; - UrDesc.width = DeviceImgDesc.width; - UrDesc.height = DeviceImgDesc.height; - UrDesc.depth = DeviceImgDesc.depth; - UrDesc.arraySize = DeviceImgDesc.array_size; - - if (DeviceImgDesc.array_size > 1) { - // Image Array. - UrDesc.type = DeviceImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D_ARRAY - : UR_MEM_TYPE_IMAGE1D_ARRAY; + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( + get_pointer_type(Src, MQueue->get_context()), + get_pointer_type(Dest, MQueue->get_context())); - // Cubemap. - UrDesc.type = DeviceImgDesc.type == - sycl::ext::oneapi::experimental::image_type::cubemap - ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : UrDesc.type; + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || + ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, + SrcRowPitch, DestRowPitch); } else { - UrDesc.type = DeviceImgDesc.depth > 0 - ? UR_MEM_TYPE_IMAGE3D - : (DeviceImgDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D - : UR_MEM_TYPE_IMAGE1D); + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs device " + "to device or host to host copies!"); } - ur_image_format_t UrFormat; - UrFormat.channelType = - sycl::_V1::detail::convertChannelType(DeviceImgDesc.channel_type); - UrFormat.channelOrder = sycl::detail::convertChannelOrder( - sycl::_V1::ext::oneapi::experimental::detail:: - get_image_default_channel_order(DeviceImgDesc.num_channels)); + setType(detail::CGType::CopyImage); +} - impl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]}; - impl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]}; - impl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]}; - impl->MSrcImageFormat = UrFormat; - impl->MDstImageFormat = UrFormat; - impl->MImageCopyFlags = detail::getUrImageCopyFlags( +// DtoD or HtoH USM to USM copy with offsets and extent +void handler::ext_oneapi_copy( + const void *Src, sycl::range<3> SrcOffset, + const ext::oneapi::experimental::image_descriptor &SrcImgDesc, + size_t SrcRowPitch, void *Dest, sycl::range<3> DestOffset, + const ext::oneapi::experimental::image_descriptor &DestImgDesc, + size_t DestRowPitch, sycl::range<3> CopyExtent) { + MSrcPtr = const_cast(Src); + MDstPtr = Dest; + + ur_exp_image_copy_flags_t ImageCopyFlags = detail::getUrImageCopyFlags( get_pointer_type(Src, MQueue->get_context()), get_pointer_type(Dest, MQueue->get_context())); - impl->MSrcImageDesc = UrDesc; - impl->MDstImageDesc = UrDesc; - // Fill the descriptor row pitch and host extent based on the type of copy. - if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - impl->MDstImageDesc.rowPitch = DeviceRowPitch; - impl->MSrcImageDesc.rowPitch = 0; - impl->MSrcImageDesc.width = HostExtent[0]; - impl->MSrcImageDesc.height = HostExtent[1]; - impl->MSrcImageDesc.depth = HostExtent[2]; - } else if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - impl->MSrcImageDesc.rowPitch = DeviceRowPitch; - impl->MDstImageDesc.rowPitch = 0; - impl->MDstImageDesc.width = HostExtent[0]; - impl->MDstImageDesc.height = HostExtent[1]; - impl->MDstImageDesc.depth = HostExtent[2]; + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || + ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST) { + detail::fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, + SrcRowPitch, DestRowPitch, SrcOffset, {0, 0, 0}, + DestOffset, {0, 0, 0}, CopyExtent); } else { - impl->MDstImageDesc.rowPitch = DeviceRowPitch; - impl->MSrcImageDesc.rowPitch = DeviceRowPitch; + throw sycl::exception(make_error_code(errc::invalid), + "Copy Error: This copy function only performs device " + "to device or host to host copies!"); } setType(detail::CGType::CopyImage); diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp new file mode 100644 index 0000000000000..08de84c55a795 --- /dev/null +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp @@ -0,0 +1,289 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +template +void copy_image_mem_handle_to_image_mem_handle( + syclexp::image_descriptor &descSrc, syclexp::image_descriptor &descDest, + const std::vector &testData, sycl::device dev, sycl::queue q, + std::vector &out) { + syclexp::image_mem_handle imgMemSrc = syclexp::alloc_image_mem(descSrc, q); + syclexp::image_mem_handle imgMemDst = syclexp::alloc_image_mem(descDest, q); + + q.ext_oneapi_copy(testData.data(), imgMemSrc, descSrc); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemSrc, descSrc, imgMemDst, descDest); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemDst, out.data(), descDest); + q.wait_and_throw(); + + syclexp::free_image_mem(imgMemSrc, syclexp::image_type::standard, q); + syclexp::free_image_mem(imgMemDst, syclexp::image_type::standard, q); +} + +template +void copy_image_mem_handle_to_usm(syclexp::image_descriptor &descSrc, + syclexp::image_descriptor &descDest, + const std::vector &testData, + sycl::device dev, sycl::queue q, + std::vector &out) { + syclexp::image_mem imgMemSrc(descSrc, dev, q.get_context()); + + size_t pitch = 0; + void *imgMemDst = nullptr; + if (NDims == 1) { + size_t elements = descDest.width * descDest.num_channels; + imgMemDst = sycl::malloc_device(elements, q); + } else { + imgMemDst = syclexp::pitched_alloc_device(&pitch, descDest, q); + } + + q.ext_oneapi_copy(testData.data(), imgMemSrc.get_handle(), descSrc); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), descSrc, imgMemDst, descDest, + pitch); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemDst, out.data(), descDest, pitch); + q.wait_and_throw(); + + sycl::free(imgMemDst, q.get_context()); +} + +template +void copy_usm_to_image_mem_handle(syclexp::image_descriptor &descSrc, + syclexp::image_descriptor &descDest, + const std::vector &testData, + sycl::device dev, sycl::queue q, + std::vector &out) { + size_t pitch = 0; + void *imgMemSrc = nullptr; + if (NDims == 1) { + size_t elements = descSrc.width * descSrc.num_channels; + imgMemSrc = sycl::malloc_device(elements, q); + } else { + imgMemSrc = syclexp::pitched_alloc_device(&pitch, descSrc, q); + } + + syclexp::image_mem imgMemDst(descDest, dev, q.get_context()); + + q.ext_oneapi_copy(testData.data(), imgMemSrc, descSrc, pitch); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemSrc, descSrc, pitch, imgMemDst.get_handle(), + descDest); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), descDest); + q.wait_and_throw(); + + sycl::free(imgMemSrc, q.get_context()); +} + +template +void copy_usm_to_usm(syclexp::image_descriptor &descSrc, + syclexp::image_descriptor &descDest, + const std::vector &testData, sycl::device dev, + sycl::queue q, std::vector &out) { + + size_t pitchSrc = 0; + void *imgMemSrc = nullptr; + if (NDims == 1) { + size_t elements = descSrc.width * descSrc.num_channels; + imgMemSrc = sycl::malloc_device(elements, q); + } else { + imgMemSrc = syclexp::pitched_alloc_device(&pitchSrc, descSrc, q); + } + + size_t pitchDest = 0; + void *imgMemDst = nullptr; + if (NDims == 1) { + size_t elements = descDest.width * descDest.num_channels; + imgMemDst = sycl::malloc_device(elements, q); + } else { + imgMemDst = syclexp::pitched_alloc_device(&pitchDest, descDest, q); + } + + q.ext_oneapi_copy(testData.data(), imgMemSrc, descSrc, pitchSrc); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemSrc, descSrc, pitchSrc, imgMemDst, descDest, + pitchDest); + q.wait_and_throw(); + + q.ext_oneapi_copy(imgMemDst, out.data(), descDest, pitchDest); + q.wait_and_throw(); + + sycl::free(imgMemSrc, q.get_context()); + sycl::free(imgMemDst, q.get_context()); +} + +template +bool check_test(const std::vector &out, + const std::vector &expected) { + assert(out.size() == expected.size()); + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +template +bool run_copy_test_with(sycl::device &dev, sycl::queue &q, + sycl::range dims) { + static_assert(sizeof(dataTypeSrc) == sizeof(dataTypeDest), + "Image data type sizes aren't equal"); + std::vector dataSequence(dims.size()); + std::vector out(dims.size()); + std::vector expected(dims.size()); + + std::iota(dataSequence.begin(), dataSequence.end(), 0); + + if constexpr (std::is_same_v) { + std::iota(expected.begin(), expected.end(), 0); + } else { + std::memcpy(expected.data(), dataSequence.data(), + dims.size() * sizeof(dataTypeDest)); + } + + syclexp::image_descriptor descSrc; + syclexp::image_descriptor descDest; + + if constexpr (imageType == syclexp::image_type::standard) { + descSrc = syclexp::image_descriptor(dims, channelNumSrc, channelTypeSrc); + descDest = syclexp::image_descriptor(dims, channelNumDest, channelTypeDest); + } else { + descSrc = syclexp::image_descriptor( + {dims[0], NDims > 2 ? dims[1] : 0}, channelNumSrc, channelTypeSrc, + syclexp::image_type::array, 1, NDims > 2 ? dims[2] : dims[1]); + descDest = syclexp::image_descriptor( + {dims[0], NDims > 2 ? dims[1] : 0}, channelNumDest, channelTypeDest, + syclexp::image_type::array, 1, NDims > 2 ? dims[2] : dims[1]); + } + + bool verified = true; + + copy_image_mem_handle_to_image_mem_handle( + descSrc, descDest, dataSequence, dev, q, out); + verified = verified && check_test(out, expected); + + // 3D USM image memory and image arrays backed by USM are not supported + if (NDims != 3 && imageType != syclexp::image_type::array) { + copy_image_mem_handle_to_usm( + descSrc, descDest, dataSequence, dev, q, out); + verified = verified && check_test(out, expected); + + copy_usm_to_image_mem_handle( + descSrc, descDest, dataSequence, dev, q, out); + verified = verified && check_test(out, expected); + + copy_usm_to_usm( + descSrc, descDest, dataSequence, dev, q, out); + verified = verified && check_test(out, expected); + } + + return verified; +} + +template +bool run_copy_test_with(sycl::device &dev, sycl::queue &q, + sycl::range dims) { + return run_copy_test_with(dev, q, dims); +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + + // Standard images copies + bool validated = + run_copy_test_with<1, sycl::image_channel_type::fp32, float, 2>( + dev, q, {128, 128}); + + validated &= run_copy_test_with<1, sycl::image_channel_type::fp32, float, 1>( + dev, q, {128 * 4}); + + validated &= run_copy_test_with<1, sycl::image_channel_type::fp32, float, 3>( + dev, q, {128, 128, 4}); + + // Standard image copies using different data types + validated &= + run_copy_test_with<1, sycl::image_channel_type::fp32, float, 1, + sycl::image_channel_type::signed_int32, int, 2>( + dev, q, {128, 128}); + + if (dev.has(sycl::aspect::fp16)) { + validated &= + run_copy_test_with<1, sycl::image_channel_type::fp16, sycl::half, 1, + sycl::image_channel_type::signed_int16, short, 2>( + dev, q, {128, 128}); + } + + // Layered images copies + validated &= + run_copy_test_with<1, sycl::image_channel_type::fp32, float, 2, + syclexp::image_type::array>(dev, q, {956, 38}); + + validated &= + run_copy_test_with<1, sycl::image_channel_type::fp32, float, 3, + syclexp::image_type::array>(dev, q, {128, 128, 4}); + + // Layered image copies using different data types + validated &= + run_copy_test_with<1, sycl::image_channel_type::fp32, float, 1, + sycl::image_channel_type::signed_int32, int, 2, + syclexp::image_type::array>(dev, q, {956, 38}); + + validated &= + run_copy_test_with<1, sycl::image_channel_type::unsigned_int32, + unsigned int, 1, sycl::image_channel_type::fp32, float, + 2, syclexp::image_type::array>(dev, q, {956, 38}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp new file mode 100644 index 0000000000000..357d67d78de9b --- /dev/null +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp @@ -0,0 +1,489 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +void copy_image_mem_handle_to_image_mem_handle( + const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, const std::vector &dataIn1, + const std::vector &dataIn2, sycl::device dev, sycl::queue q, + std::vector &out) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); + q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + + // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, + imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); + + // Copy second half of imgMemSrcOne to second quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {outDesc.width / 4, 0, 0}, outDesc, copyExtent); + + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, + imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, + copyExtent); + + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); + + q.wait_and_throw(); +} + +void copy_image_mem_handle_to_usm(const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn1, + const std::vector &dataIn2, + sycl::device dev, sycl::queue q, + std::vector &out) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); + + // Allocate 1D device USM memory. Pitch set to zero as it is a 1D image + size_t pitch = 0; + size_t elements = outDesc.width * outDesc.num_channels; + void *imgMemDst = sycl::malloc_device(elements, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); + q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + + // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst, + {0, 0, 0}, outDesc, pitch, copyExtent); + + // Copy second half of imgMemSrcOne to second quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst, {outDesc.width / 4, 0, 0}, outDesc, + pitch, copyExtent); + + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst, + {outDesc.width / 2, 0, 0}, outDesc, pitch, copyExtent); + + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, + outDesc, pitch, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst, out.data(), outDesc, pitch); + + q.wait_and_throw(); + + sycl::free(imgMemDst, q); +} + +void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn1, + const std::vector &dataIn2, + sycl::device dev, sycl::queue q, + std::vector &out) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + size_t pitchSrc1 = 0; + size_t pitchSrc2 = 0; + size_t elements = outDesc.width * outDesc.num_channels; + void *imgMemSrc1 = sycl::malloc_device(elements, q); + void *imgMemSrc2 = sycl::malloc_device(elements, q); + + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1, dataInDesc, pitchSrc1); + q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2, dataInDesc, pitchSrc2); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + + // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1, {0, 0, 0}, dataInDesc, pitchSrc1, + imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); + + // Copy second half of imgMemSrcOne to second quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitchSrc1, imgMemDst.get_handle(), + {outDesc.width / 4, 0, 0}, outDesc, copyExtent); + + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2, {0, 0, 0}, dataInDesc, pitchSrc2, + imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, + copyExtent); + + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitchSrc2, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); + + q.wait_and_throw(); + + sycl::free(imgMemSrc1, q); + sycl::free(imgMemSrc2, q); +} + +void copy_usm_to_usm(const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn1, + const std::vector &dataIn2, sycl::device dev, + sycl::queue q, std::vector &out) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + size_t pitchSrc1 = 0; + size_t pitchSrc2 = 0; + size_t elementsSrc = outDesc.width * outDesc.num_channels; + void *imgMemSrc1 = sycl::malloc_device(elementsSrc, q); + void *imgMemSrc2 = sycl::malloc_device(elementsSrc, q); + + // syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + size_t pitchDst = 0; + size_t elementsDst = outDesc.width * outDesc.num_channels; + void *imgMemDst = sycl::malloc_device(elementsDst, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1, dataInDesc, pitchSrc1); + q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2, dataInDesc, pitchSrc2); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + + // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1, {0, 0, 0}, dataInDesc, pitchSrc1, imgMemDst, + {0, 0, 0}, outDesc, pitchDst, copyExtent); + + // Copy second half of imgMemSrcOne to second quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc1, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitchSrc1, imgMemDst, {outDesc.width / 4, 0, 0}, outDesc, + pitchDst, copyExtent); + + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2, {0, 0, 0}, dataInDesc, pitchSrc2, imgMemDst, + {outDesc.width / 2, 0, 0}, outDesc, pitchDst, copyExtent); + + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + q.ext_oneapi_copy(imgMemSrc2, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitchSrc2, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, + outDesc, pitchDst, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst, out.data(), outDesc, pitchDst); + + q.wait_and_throw(); + + sycl::free(imgMemSrc1, q); + sycl::free(imgMemSrc2, q); + sycl::free(imgMemDst, q); +} + +bool image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc(dataInDesc, dev, q.get_context()); + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + } catch (sycl::exception e) { + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return false; + } + + return false; +} + +bool image_mem_handle_to_usm_out_of_bounds_copy( + const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + syclexp::image_mem imgMemSrc(dataInDesc, dev, q.get_context()); + + size_t pitch = 0; + size_t elements = outDesc.width * outDesc.num_channels; + void *imgMemDst = sycl::malloc_device(elements, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), dataInDesc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; + + try { + // Perform out of bounds copy! + q.ext_oneapi_copy(imgMemSrc.get_handle(), {dataInDesc.width / 2, 0, 0}, + dataInDesc, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, + outDesc, pitch, copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemDst, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemDst, q); + return false; +} + +bool usm_to_image_mem_handle_out_of_bounds_copy( + const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + size_t pitch = 0; + size_t elements = dataInDesc.width * dataInDesc.num_channels; + void *imgMemSrc = sycl::malloc_device(elements, q); + + syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, dataInDesc, pitch); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; + + try { + // Perform out of bounds copy! + q.ext_oneapi_copy(imgMemSrc, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitch, imgMemDst.get_handle(), + {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemSrc, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemSrc, q); + return false; +} + +bool usm_to_usm_out_of_bounds_copy(const syclexp::image_descriptor &dataInDesc, + const syclexp::image_descriptor &outDesc, + const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + // Check that output image is double size of input images + assert(outDesc.width == dataInDesc.width * 2); + + size_t pitchSrc = 0; + size_t elementsSrc = dataInDesc.width * dataInDesc.num_channels; + void *imgMemSrc = sycl::malloc_device(elementsSrc, q); + + size_t pitchDst = 0; + size_t elementsDst = dataInDesc.width * dataInDesc.num_channels; + void *imgMemDst = sycl::malloc_device(elementsDst, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, dataInDesc, pitchSrc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; + + try { + // Perform out of bounds copy! + q.ext_oneapi_copy(imgMemSrc, {dataInDesc.width / 2, 0, 0}, dataInDesc, + pitchSrc, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, + outDesc, pitchDst, copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemSrc, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemSrc, q); + return false; +} + +bool check_test(const std::vector &out, + const std::vector &expected) { + assert(out.size() == expected.size()); + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +template +bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { + std::vector dataIn1(dims.size() / 2); + std::vector dataIn2(dims.size() / 2); + std::vector out(dims.size()); + + std::vector expected(dims.size()); + + // Create two sets of input data. Each half the size of the output + // and one beginning sequentually after the other. + std::iota(dataIn1.begin(), dataIn1.end(), 0); + std::iota(dataIn2.begin(), dataIn2.end(), (dataIn2.size())); + + // Set expected to be sequential + std::iota(expected.begin(), expected.end(), 0); + + syclexp::image_descriptor outDesc = + syclexp::image_descriptor(dims, channelNum, channelType); + syclexp::image_descriptor dataInDesc = + syclexp::image_descriptor(dims / 2, channelNum, channelType); + + bool validated = true; + + // Perform copy checks + copy_image_mem_handle_to_image_mem_handle(dataInDesc, outDesc, dataIn1, + dataIn2, dev, q, out); + + validated = validated && check_test(out, expected); + + copy_image_mem_handle_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, + out); + + validated = validated && check_test(out, expected); + + copy_usm_to_image_mem_handle(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, + out); + + validated = validated && check_test(out, expected); + + copy_usm_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); + + validated = validated && check_test(out, expected); + + // Perform out of bounds copy checks + validated = + validated && image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + dataInDesc, outDesc, dataIn1, dev, q); + + validated = validated && image_mem_handle_to_usm_out_of_bounds_copy( + dataInDesc, outDesc, dataIn1, dev, q); + + validated = validated && usm_to_image_mem_handle_out_of_bounds_copy( + dataInDesc, outDesc, dataIn1, dev, q); + + validated = validated && usm_to_usm_out_of_bounds_copy(dataInDesc, outDesc, + dataIn1, dev, q); + + return validated; +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + + bool validated = + run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp new file mode 100644 index 0000000000000..d13a9146b2784 --- /dev/null +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp @@ -0,0 +1,411 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +namespace syclexp = sycl::ext::oneapi::experimental; + +void copy_image_mem_handle_to_image_mem_handle( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q, std::vector &out) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + // Copy four quarters of square into output image + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, + imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + + q.wait_and_throw(); +} + +void copy_image_mem_handle_to_usm(const syclexp::image_descriptor &desc, + const std::vector &dataIn, + sycl::device dev, sycl::queue q, + std::vector &out) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + + // Allocate 2D device USM memory + size_t pitch = 0; + void *imgMemDst = syclexp::pitched_alloc_device(&pitch, desc, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + // Copy four quarters of square into output image + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, imgMemDst, + {0, 0, 0}, desc, pitch, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + imgMemDst, {desc.width / 2, 0, 0}, desc, pitch, copyExtent); + + q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + imgMemDst, {0, desc.height / 2, 0}, desc, pitch, + copyExtent); + + q.ext_oneapi_copy( + imgMemSrc.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst, {desc.width / 2, desc.height / 2, 0}, desc, pitch, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst, out.data(), desc, pitch); + + q.wait_and_throw(); + + sycl::free(imgMemDst, q); +} + +void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &desc, + const std::vector &dataIn, + sycl::device dev, sycl::queue q, + std::vector &out) { + // Allocate 2D device USM memory + size_t pitch = 0; + void *imgMemSrc = syclexp::pitched_alloc_device(&pitch, desc, q); + + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitch); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + // Copy four quarters of square into output image + q.ext_oneapi_copy(imgMemSrc, {0, 0, 0}, desc, pitch, imgMemDst.get_handle(), + {0, 0, 0}, desc, copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, 0, 0}, desc, pitch, + imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {0, desc.height / 2, 0}, desc, pitch, + imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitch, imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + + q.wait_and_throw(); + + sycl::free(imgMemSrc, q); +} + +void copy_usm_to_usm(const syclexp::image_descriptor &desc, + const std::vector &dataIn, sycl::device dev, + sycl::queue q, std::vector &out) { + // Allocate 2D device USM memory + size_t pitchSrc = 0; + void *imgMemSrc = syclexp::pitched_alloc_device(&pitchSrc, desc, q); + + size_t pitchDst = 0; + void *imgMemDst = syclexp::pitched_alloc_device(&pitchDst, desc, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitchSrc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + // Copy four quarters of square into output image + q.ext_oneapi_copy(imgMemSrc, {0, 0, 0}, desc, pitchSrc, imgMemDst, {0, 0, 0}, + desc, pitchDst, copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, 0, 0}, desc, pitchSrc, + imgMemDst, {desc.width / 2, 0, 0}, desc, pitchDst, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {0, desc.height / 2, 0}, desc, pitchSrc, + imgMemDst, {0, desc.height / 2, 0}, desc, pitchDst, + copyExtent); + + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitchSrc, imgMemDst, {desc.width / 2, desc.height / 2, 0}, + desc, pitchDst, copyExtent); + + q.wait_and_throw(); + + // Copy out data to host + q.ext_oneapi_copy(imgMemDst, out.data(), desc, pitchDst); + + q.wait_and_throw(); + + sycl::free(imgMemSrc, q); + sycl::free(imgMemDst, q); +} + +bool image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy( + imgMemSrc.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, + imgMemDst.get_handle(), {desc.width / 2, (desc.height / 2) + 1, 0}, + desc, copyExtent); + } catch (sycl::exception e) { + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return false; + } + + return false; +} + +bool image_mem_handle_to_usm_out_of_bounds_copy( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); + + size_t pitch = 0; + void *imgMemDst = syclexp::pitched_alloc_device(&pitch, desc, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, imgMemDst, + {desc.width / 2, (desc.height / 2) + 1, 0}, desc, pitch, + copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemDst, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemDst, q); + + return false; +} + +bool usm_to_image_mem_handle_out_of_bounds_copy( + const syclexp::image_descriptor &desc, const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + size_t pitch = 0; + void *imgMemSrc = syclexp::pitched_alloc_device(&pitch, desc, q); + + syclexp::image_mem imgMemDst(desc, dev, q.get_context()); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitch); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitch, imgMemDst.get_handle(), + {desc.width / 2, (desc.height / 2) + 1, 0}, desc, + copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemSrc, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemSrc, q); + + return false; +} + +bool usm_to_usm_out_of_bounds_copy(const syclexp::image_descriptor &desc, + const std::vector &dataIn, + sycl::device dev, sycl::queue q) { + + size_t pitchSrc = 0; + void *imgMemSrc = syclexp::pitched_alloc_device(&pitchSrc, desc, q); + + size_t pitchDst = 0; + void *imgMemDst = syclexp::pitched_alloc_device(&pitchDst, desc, q); + + // Copy input data to device + q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitchSrc); + + q.wait_and_throw(); + + // Extent to copy + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + + try { + // Perform out of bound copy! + q.ext_oneapi_copy(imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitchSrc, imgMemDst, + {desc.width / 2, (desc.height / 2) + 1, 0}, desc, + pitchDst, copyExtent); + } catch (sycl::exception e) { + sycl::free(imgMemSrc, q); + return true; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + } + + sycl::free(imgMemSrc, q); + + return false; +} + +bool check_test(const std::vector &out, + const std::vector &expected) { + bool validated = true; + for (int i = 0; i < out.size(); i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + return validated; +} + +template +bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { + std::vector dataIn(dims.size()); + std::iota(dataIn.begin(), dataIn.end(), 0); + + std::vector expected(dims.size()); + std::iota(expected.begin(), expected.end(), 0); + + std::vector out(dims.size()); + + syclexp::image_descriptor desc = + syclexp::image_descriptor(dims, channelNum, channelType); + + bool validated = true; + + // Perform copy checks + copy_image_mem_handle_to_image_mem_handle(desc, dataIn, dev, q, out); + + validated = validated && check_test(out, expected); + + copy_image_mem_handle_to_usm(desc, dataIn, dev, q, out); + + validated = validated && check_test(out, expected); + + copy_usm_to_image_mem_handle(desc, dataIn, dev, q, out); + + validated = validated && check_test(out, expected); + + copy_usm_to_usm(desc, dataIn, dev, q, out); + + validated = validated && check_test(out, expected); + + // Perform out of bounds copy checks + validated = + validated && image_mem_handle_to_image_mem_handle_out_of_bounds_copy( + desc, dataIn, dev, q); + + validated = validated && + image_mem_handle_to_usm_out_of_bounds_copy(desc, dataIn, dev, q); + + validated = validated && + usm_to_image_mem_handle_out_of_bounds_copy(desc, dataIn, dev, q); + + validated = validated && usm_to_usm_out_of_bounds_copy(desc, dataIn, dev, q); + + return validated; +} + +int main() { + + sycl::device dev; + sycl::queue q(dev); + + bool validated = + run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12, 12}); + + if (!validated) { + std::cout << "Tests failed\n"; + return 1; + } + + std::cout << "Tests passed\n"; + + return 0; +} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp similarity index 99% rename from sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp rename to sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp index 680814bf6be77..cc9562d464227 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp @@ -1,3 +1,4 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: cuda // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp new file mode 100644 index 0000000000000..7b7fb6f7e80a8 --- /dev/null +++ b/sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp @@ -0,0 +1,110 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + const size_t width = 3; + const size_t height = 5; + size_t N = width * height; + + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + + for (size_t i = 0; i < width; i++) { + for (size_t j = 0; j < height; j++) { + expected[i + (width * j)] = static_cast(i + (width * j)); + dataIn[i + (width * j)] = static_cast(N); + out[i + (width * j)] = static_cast(N); + } + } + + try { + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, 1, sycl::image_channel_type::fp32); + + size_t src_pitch = 0; + size_t dst_pitch = 0; + + // Extension: returns the device pointer to USM allocated pitched memory + void *srcMem = sycl::ext::oneapi::experimental::pitched_alloc_device( + &src_pitch, desc, q); + auto dstMem = sycl::ext::oneapi::experimental::pitched_alloc_device( + &dst_pitch, desc, q); + + if (srcMem == nullptr || dstMem == nullptr) { + std::cerr << "Error allocating memory!" << std::endl; + return 1; + } + + // Extension: copy pitched data to device USM + std::vector depEvents; + depEvents.push_back( + q.ext_oneapi_copy(expected.data(), srcMem, desc, src_pitch)); + // Extension: set incorrect data to dstMem to ensure later it is correctly + // overwritten + depEvents.push_back( + q.ext_oneapi_copy(dataIn.data(), dstMem, desc, dst_pitch)); + + // Extension: copy pitched data from device USM to device USM + depEvents = {q.ext_oneapi_copy(srcMem, desc, src_pitch, dstMem, desc, + dst_pitch, depEvents)}; + + // Extension: copy pitched data from device USM + q.ext_oneapi_copy(dstMem, out.data(), desc, dst_pitch, depEvents); + + q.wait_and_throw(); + sycl::free(srcMem, ctxt); + sycl::free(dstMem, ctxt); + } catch (sycl::exception &e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (size_t i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp new file mode 100644 index 0000000000000..37e58a3d75bef --- /dev/null +++ b/sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp @@ -0,0 +1,127 @@ +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm +// REQUIRES: cuda + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include +#include + +// Uncomment to print additional test information +// #define VERBOSE_PRINT + +int main() { + + sycl::device dev; + sycl::queue q(dev); + auto ctxt = q.get_context(); + const size_t width = 4; + const size_t height = 5; + size_t N = width * height; + size_t widthInBytes = width * sizeof(float); + + std::vector out(N); + std::vector expected(N); + std::vector dataIn(N); + + for (size_t i = 0; i < width; i++) { + for (size_t j = 0; j < height; j++) { + expected[i + (width * j)] = static_cast(i + (width * j)); + dataIn[i + (width * j)] = static_cast(N); + out[i + (width * j)] = static_cast(N); + } + } + + try { + auto devicePitchAlign = dev.get_info< + sycl::ext::oneapi::experimental::info::device::image_row_pitch_align>(); + auto deviceMaxPitch = + dev.get_info(); + + // Pitch requirements: + // - pitch % devicePitchAlign == 0 + // - pitch >= widthInBytes + // - pitch <= deviceMaxPitch + size_t pitch = + devicePitchAlign * + static_cast(std::ceil(static_cast(widthInBytes) / + static_cast(devicePitchAlign))); + assert(pitch <= deviceMaxPitch); + assert((devicePitchAlign & devicePitchAlign >> 1) == 0); + + void *srcMem = + sycl::aligned_alloc_host(devicePitchAlign, (pitch * height), ctxt); + void *dstMem = + sycl::aligned_alloc_host(devicePitchAlign, (pitch * height), ctxt); + if (srcMem == nullptr || dstMem == nullptr) { + std::cerr << "Error allocating memory!" << std::endl; + return 1; + } + + // Extension: image descriptor + sycl::ext::oneapi::experimental::image_descriptor desc( + {width, height}, 1, sycl::image_channel_type::fp32); + + // Copy pitched data to host USM + std::vector depEvents; + depEvents.push_back(q.ext_oneapi_memcpy2d(srcMem, devicePitchAlign, + expected.data(), widthInBytes, + widthInBytes, height)); + // Set incorrect data to dstMem to ensure later it is correctly + // overwritten + depEvents.push_back(q.ext_oneapi_memcpy2d(dstMem, devicePitchAlign, + dataIn.data(), widthInBytes, + widthInBytes, height)); + + // Extension: copy pitched data from device USM to device USM + depEvents = { + q.ext_oneapi_copy(srcMem, desc, pitch, dstMem, desc, pitch, depEvents)}; + + // Copy pitched data from host USM + q.ext_oneapi_memcpy2d(out.data(), widthInBytes, dstMem, devicePitchAlign, + widthInBytes, height, depEvents); + + q.wait_and_throw(); + sycl::free(srcMem, ctxt); + sycl::free(dstMem, ctxt); + } catch (sycl::exception &e) { + std::cerr << "SYCL exception caught! : " << e.what() << "\n"; + return 1; + } catch (...) { + std::cerr << "Unknown exception caught!\n"; + return 2; + } + + // collect and validate output + bool validated = true; + for (size_t i = 0; i < N; i++) { + bool mismatch = false; + if (out[i] != expected[i]) { + mismatch = true; + validated = false; + } + + if (mismatch) { +#ifdef VERBOSE_PRINT + std::cout << "Result mismatch! Expected: " << expected[i] + << ", Actual: " << out[i] << std::endl; +#else + break; +#endif + } + } + if (validated) { + std::cout << "Test passed!" << std::endl; + return 0; + } + + std::cout << "Test failed!" << std::endl; + return 3; +} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy.cpp deleted file mode 100644 index 4a9263e44a13e..0000000000000 --- a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// REQUIRES: cuda - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include -#include -#include - -// Uncomment to print additional test information -// #define VERBOSE_PRINT - -namespace syclexp = sycl::ext::oneapi::experimental; - -void copy_image_mem_handle_to_image_mem_handle( - syclexp::image_descriptor &desc, const std::vector &testData, - sycl::device dev, sycl::queue q, std::vector &out) { - syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); - syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - - q.ext_oneapi_copy((void *)testData.data(), imgMemSrc.get_handle(), desc); - q.wait_and_throw(); - - q.ext_oneapi_copy(imgMemSrc.get_handle(), imgMemDst.get_handle(), desc); - q.wait_and_throw(); - - q.ext_oneapi_copy(imgMemDst.get_handle(), (void *)out.data(), desc); - q.wait_and_throw(); -} - -bool check_test(const std::vector &out, - const std::vector &expected) { - assert(out.size() == expected.size()); - bool validated = true; - for (int i = 0; i < out.size(); i++) { - bool mismatch = false; - if (out[i] != expected[i]) { - mismatch = true; - validated = false; - } - - if (mismatch) { -#ifdef VERBOSE_PRINT - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; -#else - break; -#endif - } - } - return validated; -} - -template -bool run_copy_test_with(sycl::device &dev, sycl::queue &q, - sycl::range dims) { - std::vector dataSequence(dims.size()); - std::vector out(dims.size()); - - std::vector expected(dims.size()); - - std::iota(dataSequence.begin(), dataSequence.end(), 0); - std::iota(expected.begin(), expected.end(), 0); - - syclexp::image_descriptor desc; - - if constexpr (type == syclexp::image_type::standard) { - desc = syclexp::image_descriptor(dims, channelNum, channelType); - } else { - desc = syclexp::image_descriptor( - {dims[0], dim > 2 ? dims[1] : 0}, channelNum, channelType, - syclexp::image_type::array, 1, dim > 2 ? dims[2] : dims[1]); - } - - copy_image_mem_handle_to_image_mem_handle(desc, dataSequence, dev, q, out); - - return check_test(out, expected); -} - -int main() { - - sycl::device dev; - sycl::queue q(dev); - auto ctxt = q.get_context(); - - // Standard images copies - bool validated = run_copy_test_with<1, sycl::image_channel_type::fp32, 2>( - dev, q, {2048, 2048}); - - validated &= run_copy_test_with<1, sycl::image_channel_type::fp32, 1>( - dev, q, {512 * 4}); - - validated &= run_copy_test_with<1, sycl::image_channel_type::fp32, 3>( - dev, q, {256, 256, 4}); - - // Layered images copies - validated &= - run_copy_test_with<1, sycl::image_channel_type::fp32, 2, - syclexp::image_type::array>(dev, q, {956, 38}); - validated &= - run_copy_test_with<1, sycl::image_channel_type::fp32, 3, - syclexp::image_type::array>(dev, q, {256, 256, 4}); - - if (!validated) { - std::cout << "Tests failed"; - return 1; - } - - std::cout << "Tests passed"; - - return 0; -} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp deleted file mode 100644 index 250195358011a..0000000000000 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp +++ /dev/null @@ -1,172 +0,0 @@ -// REQUIRES: cuda - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include -#include -#include - -// Uncomment to print additional test information -// #define VERBOSE_PRINT - -namespace syclexp = sycl::ext::oneapi::experimental; - -void copy_image_mem_handle_to_image_mem_handle( - const syclexp::image_descriptor &dataInDesc, - const syclexp::image_descriptor &outDesc, - const std::vector &dataIn1, const std::vector &dataIn2, - sycl::device dev, sycl::queue q, std::vector &out) { - - // Check that output image is double size of input images - assert(outDesc.width == dataInDesc.width * 2); - - syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); - syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); - syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); - - // Copy input data to device - q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); - q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); - - q.wait_and_throw(); - - // Extent to copy - sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; - - // Copy first half of imgMemSrcOne to first quarter of imgMemDst - q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, - imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); - - // Copy second half of imgMemSrcOne to second quarter of imgMemDst - q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, - dataInDesc, imgMemDst.get_handle(), - {outDesc.width / 4, 0, 0}, outDesc, copyExtent); - - // Copy first half of imgMemSrcTwo to third quarter of imgMemDst - q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, - imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, - copyExtent); - - // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst - q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, - dataInDesc, imgMemDst.get_handle(), - {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); - - q.wait_and_throw(); - - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); - - q.wait_and_throw(); -} - -bool out_of_bounds_copy(const syclexp::image_descriptor &dataInDesc, - const syclexp::image_descriptor &outDesc, - const std::vector &dataIn, sycl::device dev, - sycl::queue q) { - - // Check that output image is double size of input images - assert(outDesc.width == dataInDesc.width * 2); - - syclexp::image_mem imgMemSrc(dataInDesc, dev, q.get_context()); - syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); - - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), dataInDesc); - - q.wait_and_throw(); - - // Extent to copy - sycl::range copyExtent = {(dataInDesc.width / 2) + 1, 1, 1}; - - try { - // Perform out of bound copy! - q.ext_oneapi_copy(imgMemSrc.get_handle(), {dataInDesc.width / 2, 0, 0}, - dataInDesc, imgMemDst.get_handle(), - {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); - } catch (sycl::exception e) { - return true; - } catch (...) { - std::cerr << "Unknown exception caught!\n"; - return false; - } - - return false; -} - -bool check_test(const std::vector &out, - const std::vector &expected) { - assert(out.size() == expected.size()); - bool validated = true; - for (int i = 0; i < out.size(); i++) { - bool mismatch = false; - if (out[i] != expected[i]) { - mismatch = true; - validated = false; - } - - if (mismatch) { -#ifdef VERBOSE_PRINT - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; -#else - break; -#endif - } - } - return validated; -} - -template -bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { - std::vector dataIn1(dims.size() / 2); - std::vector dataIn2(dims.size() / 2); - std::vector out(dims.size()); - - std::vector expected(dims.size()); - - // Create two sets of input data. Each half the size of the output - // and one beginning sequentually after the other. - std::iota(dataIn1.begin(), dataIn1.end(), 0); - std::iota(dataIn2.begin(), dataIn2.end(), (dataIn2.size())); - - // Set expected to be sequential - std::iota(expected.begin(), expected.end(), 0); - - syclexp::image_descriptor outDesc = - syclexp::image_descriptor(dims, channelNum, channelType); - syclexp::image_descriptor dataInDesc = - syclexp::image_descriptor(dims / 2, channelNum, channelType); - - // Perform copy - copy_image_mem_handle_to_image_mem_handle(dataInDesc, outDesc, dataIn1, - dataIn2, dev, q, out); - - bool copyValidated = check_test(out, expected); - - bool exceptionValidated = - out_of_bounds_copy(dataInDesc, outDesc, dataIn1, dev, q); - - return copyValidated && exceptionValidated; -} - -int main() { - - sycl::device dev; - sycl::queue q(dev); - - bool validated = - run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12}); - - if (!validated) { - std::cout << "Tests failed\n"; - return 1; - } - - std::cout << "Tests passed\n"; - - return 0; -} diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp deleted file mode 100644 index 0dea97a3f745e..0000000000000 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp +++ /dev/null @@ -1,147 +0,0 @@ -// REQUIRES: cuda - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include -#include -#include - -// Uncomment to print additional test information -// #define VERBOSE_PRINT - -namespace syclexp = sycl::ext::oneapi::experimental; - -void copy_image_mem_handle_to_image_mem_handle( - const syclexp::image_descriptor &desc, const std::vector &dataIn, - sycl::device dev, sycl::queue q, std::vector &out) { - syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); - syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); - - q.wait_and_throw(); - - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; - - // Copy four quarters of square into output image - q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, - imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); - - q.ext_oneapi_copy(imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, - imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, - copyExtent); - - q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, - imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, - copyExtent); - - q.ext_oneapi_copy(imgMemSrc.get_handle(), - {desc.width / 2, desc.height / 2, 0}, desc, - imgMemDst.get_handle(), - {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); - - q.wait_and_throw(); - - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); - - q.wait_and_throw(); -} - -bool out_of_bounds_copy(const syclexp::image_descriptor &desc, - const std::vector &dataIn, sycl::device dev, - sycl::queue q) { - syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); - syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); - - q.wait_and_throw(); - - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; - - try { - // Perform out of bound copy! - q.ext_oneapi_copy( - imgMemSrc.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, - imgMemDst.get_handle(), {desc.width / 2, (desc.height / 2) + 1, 0}, - desc, copyExtent); - } catch (sycl::exception e) { - return true; - } catch (...) { - std::cerr << "Unknown exception caught!\n"; - return false; - } - - return false; -} - -bool check_test(const std::vector &out, - const std::vector &expected) { - bool validated = true; - for (int i = 0; i < out.size(); i++) { - bool mismatch = false; - if (out[i] != expected[i]) { - mismatch = true; - validated = false; - } - - if (mismatch) { -#ifdef VERBOSE_PRINT - std::cout << "Result mismatch! Expected: " << expected[i] - << ", Actual: " << out[i] << std::endl; -#else - break; -#endif - } - } - return validated; -} - -template -bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { - std::vector dataIn(dims.size()); - std::iota(dataIn.begin(), dataIn.end(), 0); - - std::vector expected(dims.size()); - std::iota(expected.begin(), expected.end(), 0); - - std::vector out(dims.size()); - - syclexp::image_descriptor desc = - syclexp::image_descriptor(dims, channelNum, channelType); - - // Perform copy - copy_image_mem_handle_to_image_mem_handle(desc, dataIn, dev, q, out); - - bool copyValidated = check_test(out, expected); - - bool exceptionValidated = out_of_bounds_copy(desc, dataIn, dev, q); - - return copyValidated && exceptionValidated; -} - -int main() { - - sycl::device dev; - sycl::queue q(dev); - - bool validated = - run_copy_test<1, sycl::image_channel_type::fp32>(dev, q, {12, 12}); - - if (!validated) { - std::cout << "Tests failed\n"; - return 1; - } - - std::cout << "Tests passed\n"; - - return 0; -} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6c5d235be24c7..83edad971c869 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3498,13 +3498,19 @@ _ZN4sycl3_V17handler13getKernelNameEv _ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE _ZN4sycl3_V17handler14setNDRangeUsedEb _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorEPvS7_S7_S7_ +_ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorEPvS7_SA_mS7_ _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleENS0_5rangeILi3EEERKNS4_16image_descriptorES5_S7_SA_S7_ _ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleEPvRKNS4_16image_descriptorE -_ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleES5_RKNS4_16image_descriptorE +_ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleERKNS4_16image_descriptorEPvS8_m +_ZN4sycl3_V17handler15ext_oneapi_copyENS0_3ext6oneapi12experimental16image_mem_handleERKNS4_16image_descriptorES5_S8_ _ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_3ext6oneapi12experimental16image_mem_handleERKNS6_16image_descriptorE _ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_5rangeILi3EEEPvS5_RKNS0_3ext6oneapi12experimental16image_descriptorEmS5_S5_ +_ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_5rangeILi3EEERKNS0_3ext6oneapi12experimental16image_descriptorEmNS8_16image_mem_handleES5_SB_S5_ +_ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_5rangeILi3EEERKNS0_3ext6oneapi12experimental16image_descriptorEmPvS5_SB_mS5_ _ZN4sycl3_V17handler15ext_oneapi_copyEPKvNS0_5rangeILi3EEES5_NS0_3ext6oneapi12experimental16image_mem_handleES5_RKNS8_16image_descriptorES5_ _ZN4sycl3_V17handler15ext_oneapi_copyEPKvPvRKNS0_3ext6oneapi12experimental16image_descriptorEm +_ZN4sycl3_V17handler15ext_oneapi_copyEPKvRKNS0_3ext6oneapi12experimental16image_descriptorEmNS6_16image_mem_handleES9_ +_ZN4sycl3_V17handler15ext_oneapi_copyEPKvRKNS0_3ext6oneapi12experimental16image_descriptorEmPvS9_m _ZN4sycl3_V17handler16ext_oneapi_graphENS0_3ext6oneapi12experimental13command_graphILNS4_11graph_stateE1EEE _ZN4sycl3_V17handler16getMaxWorkGroupsEv _ZN4sycl3_V17handler17supportsUSMFill2DEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ec729a69bfad3..edc7bcd617733 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3802,14 +3802,26 @@ ?ext_oneapi_can_compile@device@_V1@sycl@@QEAA_NW4source_language@experimental@oneapi@ext@23@@Z ?ext_oneapi_cl_profile@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?ext_oneapi_cl_profile_impl@device@_V1@sycl@@AEBA?AVstring@detail@23@XZ +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX12@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@56723@1@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_K@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXUimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXV?$range@$02@23@1Uimage_mem_handle@experimental@oneapi@ext@23@1AEBUimage_descriptor@67823@1@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX1231@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@67823@121@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXPEBXV?$range@$02@23@PEAX1AEBUimage_descriptor@experimental@oneapi@ext@23@_K11@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@56723@PEAX1_K@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@56723@U456723@1@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@56723@@Z -?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@U456723@AEBUimage_descriptor@56723@@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX111@Z +?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX12_K1@Z ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@U456723@121@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX12AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX12AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX12V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@67823@1AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@67823@1AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXAEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@67823@1V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXPEAXAEBUimage_descriptor@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z @@ -3819,18 +3831,30 @@ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@1Uimage_mem_handle@experimental@oneapi@ext@23@1AEBUimage_descriptor@78923@1AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@1Uimage_mem_handle@experimental@oneapi@ext@23@1AEBUimage_descriptor@78923@1AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@1Uimage_mem_handle@experimental@oneapi@ext@23@1AEBUimage_descriptor@78923@1V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX1231AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX1231AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KPEAX1231V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@78923@121AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@78923@121AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@AEBUimage_descriptor@experimental@oneapi@ext@23@_KUimage_mem_handle@78923@121V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@PEAX1AEBUimage_descriptor@experimental@oneapi@ext@23@_K11AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@PEAX1AEBUimage_descriptor@experimental@oneapi@ext@23@_K11AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEBXV?$range@$02@23@PEAX1AEBUimage_descriptor@experimental@oneapi@ext@23@_K11V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@PEAX1_KAEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@PEAX1_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@PEAX1_KV423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@U567823@1AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@U567823@1AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@AEBUimage_descriptor@67823@U567823@1V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@67823@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@67823@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@67823@V423@AEBUcode_location@detail@23@@Z -?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@U567823@AEBUimage_descriptor@67823@AEBUcode_location@detail@23@@Z -?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@U567823@AEBUimage_descriptor@67823@AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z -?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@U567823@AEBUimage_descriptor@67823@V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111V423@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX12_K1AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX12_K1AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z +?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX12_K1V423@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@U567823@121V423@AEBUcode_location@detail@23@@Z