Skip to content

Commit 10b0489

Browse files
committed
[HIP] Various fixes for urEnqueueMemImage(Read|Write|Copy)
It appears that several width/height/depth parameters were mixed up. This brings the HIP adapter in line with the CUDA adapter, and fixes several UR CTS tests. It also adds missing support for 1D image operations. It doesn't optimize them in the same way that the CUDA adapater appears to (using memcpy operations) but that can be left for future work.
1 parent 39cb69a commit 10b0489

File tree

2 files changed

+12
-38
lines changed

2 files changed

+12
-38
lines changed

source/adapters/hip/enqueue.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -948,15 +948,15 @@ static ur_result_t commonEnqueueMemImageNDCopy(
948948
UR_ASSERT(DstType == hipMemoryTypeArray || DstType == hipMemoryTypeHost,
949949
UR_RESULT_ERROR_INVALID_VALUE);
950950

951-
if (ImgType == UR_MEM_TYPE_IMAGE2D) {
951+
if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) {
952952
hip_Memcpy2D CpyDesc;
953953
memset(&CpyDesc, 0, sizeof(CpyDesc));
954954
CpyDesc.srcMemoryType = SrcType;
955955
if (SrcType == hipMemoryTypeArray) {
956956
CpyDesc.srcArray =
957957
reinterpret_cast<hipCUarray>(const_cast<void *>(SrcPtr));
958958
CpyDesc.srcXInBytes = SrcOffset[0];
959-
CpyDesc.srcY = SrcOffset[1];
959+
CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset[1];
960960
} else {
961961
CpyDesc.srcHost = SrcPtr;
962962
}
@@ -965,12 +965,12 @@ static ur_result_t commonEnqueueMemImageNDCopy(
965965
CpyDesc.dstArray =
966966
reinterpret_cast<hipCUarray>(const_cast<void *>(DstPtr));
967967
CpyDesc.dstXInBytes = DstOffset[0];
968-
CpyDesc.dstY = DstOffset[1];
968+
CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset[1];
969969
} else {
970970
CpyDesc.dstHost = DstPtr;
971971
}
972972
CpyDesc.WidthInBytes = Region[0];
973-
CpyDesc.Height = Region[1];
973+
CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region[1];
974974
UR_CHECK_ERROR(hipMemcpyParam2DAsync(&CpyDesc, HipStream));
975975
return UR_RESULT_SUCCESS;
976976
}
@@ -1052,11 +1052,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
10521052
int ElementByteSize = imageElementByteSize(Format);
10531053

10541054
size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels;
1055-
size_t BytesToCopy = ElementByteSize * NumChannels * region.depth;
1055+
size_t BytesToCopy = ElementByteSize * NumChannels * region.width;
10561056

10571057
auto ImgType = std::get<SurfaceMem>(hImage->Mem).getImageType();
10581058

1059-
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height};
1059+
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.depth};
10601060
size_t SrcOffset[3] = {ByteOffsetX, origin.y, origin.z};
10611061

10621062
std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
@@ -1113,11 +1113,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
11131113
int ElementByteSize = imageElementByteSize(Format);
11141114

11151115
size_t ByteOffsetX = origin.x * ElementByteSize * NumChannels;
1116-
size_t BytesToCopy = ElementByteSize * NumChannels * region.depth;
1116+
size_t BytesToCopy = ElementByteSize * NumChannels * region.width;
11171117

11181118
auto ImgType = std::get<SurfaceMem>(hImage->Mem).getImageType();
11191119

1120-
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.height};
1120+
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.depth};
11211121
size_t DstOffset[3] = {ByteOffsetX, origin.y, origin.z};
11221122

11231123
std::unique_ptr<ur_event_handle_t_> RetImplEvent{nullptr};
@@ -1186,13 +1186,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
11861186

11871187
int ElementByteSize = imageElementByteSize(SrcFormat);
11881188

1189-
size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * SrcNumChannels;
1190-
size_t SrcByteOffsetX = srcOrigin.x * ElementByteSize * DstNumChannels;
1191-
size_t BytesToCopy = ElementByteSize * SrcNumChannels * region.depth;
1189+
size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * DstNumChannels;
1190+
size_t SrcByteOffsetX = srcOrigin.x * ElementByteSize * SrcNumChannels;
1191+
size_t BytesToCopy = ElementByteSize * SrcNumChannels * region.width;
11921192

11931193
auto ImgType = std::get<SurfaceMem>(hImageSrc->Mem).getImageType();
11941194

1195-
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.width};
1195+
size_t AdjustedRegion[3] = {BytesToCopy, region.height, region.depth};
11961196
size_t SrcOffset[3] = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
11971197
size_t DstOffset[3] = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
11981198

test/conformance/enqueue/enqueue_adapter_hip.match

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,6 @@ urEnqueueMemBufferFillTest.Success/AMD_HIP_BACKEND___{{.*}}___size__256__pattern
99
urEnqueueMemBufferFillTest.Success/AMD_HIP_BACKEND___{{.*}}___size__1024__patternSize__256
1010
{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_row_2D
1111
{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_3d_2d
12-
urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___1D
13-
urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___2D
14-
urEnqueueMemImageCopyTest.Success/AMD_HIP_BACKEND___{{.*}}___3D
15-
urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___1D
16-
urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___2D
17-
urEnqueueMemImageCopyTest.SuccessPartialCopy/AMD_HIP_BACKEND___{{.*}}___3D
18-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___1D
19-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___2D
20-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/AMD_HIP_BACKEND___{{.*}}___3D
21-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___1D
22-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___2D
23-
urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/AMD_HIP_BACKEND___{{.*}}___3D
24-
urEnqueueMemImageCopyTest.InvalidNullHandleQueue/AMD_HIP_BACKEND___{{.*}}___1D
25-
urEnqueueMemImageCopyTest.InvalidNullHandleQueue/AMD_HIP_BACKEND___{{.*}}___3D
26-
urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/AMD_HIP_BACKEND___{{.*}}___1D
27-
urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/AMD_HIP_BACKEND___{{.*}}___3D
28-
urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/AMD_HIP_BACKEND___{{.*}}___1D
29-
urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/AMD_HIP_BACKEND___{{.*}}___3D
30-
urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/AMD_HIP_BACKEND___{{.*}}___1D
31-
urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/AMD_HIP_BACKEND___{{.*}}___3D
32-
urEnqueueMemImageCopyTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___1D
33-
urEnqueueMemImageCopyTest.InvalidSize/AMD_HIP_BACKEND___{{.*}}___3D
34-
urEnqueueMemImageReadTest.Success1D/AMD_HIP_BACKEND___{{.*}}_
35-
urEnqueueMemImageReadTest.Success3D/AMD_HIP_BACKEND___{{.*}}_
36-
urEnqueueMemImageWriteTest.Success1D/AMD_HIP_BACKEND___{{.*}}_
37-
urEnqueueMemImageWriteTest.Success3D/AMD_HIP_BACKEND___{{.*}}_
3812
urEnqueueUSMAdviseWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT
3913
urEnqueueUSMAdviseTest.MultipleParamsSuccess/AMD_HIP_BACKEND___{{.*}}_
4014
urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/AMD_HIP_BACKEND___{{.*}}_

0 commit comments

Comments
 (0)