@@ -583,7 +583,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
583
583
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
584
584
UR_ASSERT ((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE ||
585
585
imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST ||
586
- imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE),
586
+ imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE ||
587
+ imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST),
587
588
UR_RESULT_ERROR_INVALID_VALUE);
588
589
UR_ASSERT (pSrcImageFormat->channelOrder == pDstImageFormat->channelOrder ,
589
590
UR_RESULT_ERROR_INVALID_ARGUMENT);
@@ -651,6 +652,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
651
652
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
652
653
cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
653
654
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
655
+ cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
656
+ cpy_desc.Height = pCopyRegion->copyExtent .height ;
654
657
cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes;
655
658
if (pDstImageDesc->rowPitch == 0 ) {
656
659
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -661,8 +664,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
661
664
cpy_desc.dstDevice = (CUdeviceptr)pDst;
662
665
cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
663
666
}
664
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
665
- cpy_desc.Height = pCopyRegion->copyExtent .height ;
666
667
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
667
668
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
668
669
CUDA_MEMCPY3D cpy_desc = {};
@@ -740,22 +741,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
740
741
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
741
742
cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
742
743
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
744
+ cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
745
+ cpy_desc.Height = pCopyRegion->copyExtent .height ;
746
+ cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes;
743
747
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
744
748
cpy_desc.dstHost = pDst;
745
749
if (pSrcImageDesc->rowPitch == 0 ) {
746
750
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
747
751
cpy_desc.srcArray = as_CUArray (pSrc);
748
752
} else {
749
753
// Pitched memory
750
- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
751
754
cpy_desc.srcPitch = pSrcImageDesc->rowPitch ;
755
+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
752
756
cpy_desc.srcDevice = (CUdeviceptr)pSrc;
753
757
}
754
- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
755
- cpy_desc.dstHost = pDst;
756
- cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes;
757
- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
758
- cpy_desc.Height = pCopyRegion->copyExtent .height ;
759
758
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
760
759
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
761
760
CUDA_MEMCPY3D cpy_desc = {};
@@ -797,7 +796,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
797
796
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
798
797
}
799
798
} else {
800
- // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE
799
+ // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE ||
800
+ // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST
801
801
802
802
// we don't support copying between different image types.
803
803
if (pSrcImageDesc->type != pDstImageDesc->type ) {
@@ -810,30 +810,67 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
810
810
// synchronous because of the explicit call to cuStreamSynchronize at
811
811
// the end
812
812
if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
813
+ // Check what type of memory pSrc and pDst are to set the correct
814
+ // attributes of cpy_desc.
815
+ // If cuPointerGetAttribute returns something different from
816
+ // CUDA_SUCCESS then we know that the memory type is a CuArray.
817
+ // Otherwise, it's CU_MEMORYTYPE_DEVICE.
818
+ CUmemorytype memType;
819
+ bool isSrcCudaArray =
820
+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
821
+ (CUdeviceptr)pSrc) != CUDA_SUCCESS;
822
+ bool isDstCudaArray =
823
+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
824
+ (CUdeviceptr)pDst) != CUDA_SUCCESS;
825
+
813
826
CUDA_MEMCPY2D cpy_desc = {};
814
827
cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes;
815
828
cpy_desc.srcY = 0 ;
816
829
cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
817
830
cpy_desc.dstY = 0 ;
818
- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
819
- cpy_desc.srcArray = as_CUArray (pSrc);
820
- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
821
- cpy_desc.dstArray = (CUarray)pDst;
822
831
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
823
832
cpy_desc.Height = 1 ;
833
+ if (isSrcCudaArray) {
834
+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
835
+ cpy_desc.srcArray = as_CUArray (pSrc);
836
+ } else {
837
+ getUSMHostOrDevicePtr (pSrc, &cpy_desc.srcMemoryType ,
838
+ &cpy_desc.srcDevice , &cpy_desc.srcHost );
839
+ }
840
+ if (isDstCudaArray) {
841
+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
842
+ cpy_desc.dstArray = (CUarray)pDst;
843
+ } else {
844
+ getUSMHostOrDevicePtr (pDst, &cpy_desc.dstMemoryType ,
845
+ &cpy_desc.dstDevice , &cpy_desc.dstHost );
846
+ }
824
847
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
825
848
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
826
849
CUDA_MEMCPY2D cpy_desc = {};
827
850
cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes;
828
851
cpy_desc.srcY = pCopyRegion->srcOffset .y ;
829
852
cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
830
853
cpy_desc.dstY = pCopyRegion->dstOffset .y ;
831
- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
832
- cpy_desc.srcArray = as_CUArray (pSrc);
833
- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
834
- cpy_desc.dstArray = (CUarray)pDst;
835
854
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
836
855
cpy_desc.Height = pCopyRegion->copyExtent .height ;
856
+ if (pSrcImageDesc->rowPitch == 0 ) {
857
+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
858
+ cpy_desc.srcArray = as_CUArray (pSrc);
859
+ } else {
860
+ // Pitched memory
861
+ cpy_desc.srcPitch = pSrcImageDesc->rowPitch ;
862
+ getUSMHostOrDevicePtr (pSrc, &cpy_desc.srcMemoryType ,
863
+ &cpy_desc.srcDevice , &cpy_desc.srcHost );
864
+ }
865
+ if (pDstImageDesc->rowPitch == 0 ) {
866
+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
867
+ cpy_desc.dstArray = (CUarray)pDst;
868
+ } else {
869
+ // Pitched memory
870
+ cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
871
+ getUSMHostOrDevicePtr (pDst, &cpy_desc.dstMemoryType ,
872
+ &cpy_desc.dstDevice , &cpy_desc.dstHost );
873
+ }
837
874
UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
838
875
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
839
876
CUDA_MEMCPY3D cpy_desc = {};
0 commit comments