Skip to content

Commit a172cde

Browse files
authored
Merge pull request #2249 from zhaomaosu/use-device-usm-for-rtl-data
[DeviceASAN] Use device usm to sync asan runtime data instead of shared usm
2 parents 1851eff + cde0d4c commit a172cde

File tree

4 files changed

+161
-99
lines changed

4 files changed

+161
-99
lines changed

source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -471,10 +471,9 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
471471
numEventsInWaitList, phEventWaitList, phEvent);
472472
}
473473

474-
USMLaunchInfo LaunchInfo(GetContext(hKernel), GetDevice(hQueue),
475-
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
476-
workDim);
477-
UR_CALL(LaunchInfo.initialize());
474+
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
475+
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
476+
workDim);
478477

479478
UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
480479

source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 76 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -249,15 +249,11 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
249249

250250
ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
251251
ur_queue_handle_t Queue,
252-
USMLaunchInfo &LaunchInfo) {
252+
LaunchInfo &LaunchInfo) {
253253
auto Context = GetContext(Queue);
254254
auto Device = GetDevice(Queue);
255255
auto ContextInfo = getContextInfo(Context);
256256
auto DeviceInfo = getDeviceInfo(Device);
257-
auto KernelInfo = getKernelInfo(Kernel);
258-
assert(KernelInfo && "Kernel should be instrumented");
259-
260-
UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get()));
261257

262258
ManagedQueue InternalQueue(Context, Device);
263259
if (!InternalQueue) {
@@ -275,12 +271,14 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
275271

276272
ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
277273
ur_queue_handle_t Queue,
278-
USMLaunchInfo &LaunchInfo) {
274+
LaunchInfo &LaunchInfo) {
279275
// FIXME: We must use block operation here, until we support urEventSetCallback
280276
auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue);
281277

278+
UR_CALL(LaunchInfo.Data.syncFromDevice(Queue));
279+
282280
if (Result == UR_RESULT_SUCCESS) {
283-
for (const auto &Report : LaunchInfo.Data->Report) {
281+
for (const auto &Report : LaunchInfo.Data.Host.Report) {
284282
if (!Report.Flag) {
285283
continue;
286284
}
@@ -685,7 +683,7 @@ AsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
685683
ur_result_t AsanInterceptor::prepareLaunch(
686684
std::shared_ptr<ContextInfo> &ContextInfo,
687685
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
688-
ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
686+
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
689687

690688
do {
691689
auto KernelInfo = getKernelInfo(Kernel);
@@ -721,27 +719,20 @@ ur_result_t AsanInterceptor::prepareLaunch(
721719
}
722720
}
723721

724-
// Set launch info argument
725722
auto ArgNums = GetKernelNumArgs(Kernel);
723+
// We must prepare all kernel args before call
724+
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
725+
// CPU device.
726726
if (ArgNums) {
727-
getContext()->logger.debug(
728-
"launch_info {} (numLocalArgs={}, localArgs={})",
729-
(void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs,
730-
(void *)LaunchInfo.Data->LocalArgs);
731727
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
732-
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data);
728+
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
733729
if (URes != UR_RESULT_SUCCESS) {
734730
getContext()->logger.error("Failed to set launch info: {}",
735731
URes);
736732
return URes;
737733
}
738734
}
739735

740-
LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
741-
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
742-
LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
743-
LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0;
744-
745736
if (LaunchInfo.LocalWorkSize.empty()) {
746737
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
747738
auto URes =
@@ -768,6 +759,14 @@ ur_result_t AsanInterceptor::prepareLaunch(
768759
LocalWorkSize[Dim];
769760
}
770761

762+
// Prepare asan runtime data
763+
LaunchInfo.Data.Host.GlobalShadowOffset =
764+
DeviceInfo->Shadow->ShadowBegin;
765+
LaunchInfo.Data.Host.GlobalShadowOffsetEnd =
766+
DeviceInfo->Shadow->ShadowEnd;
767+
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
768+
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;
769+
771770
auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
772771
Device = DeviceInfo->Handle,
773772
Queue](size_t Size, uptr &Ptr) {
@@ -816,7 +815,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
816815

817816
if (EnqueueAllocateShadowMemory(
818817
LocalShadowMemorySize,
819-
LaunchInfo.Data->LocalShadowOffset) !=
818+
LaunchInfo.Data.Host.LocalShadowOffset) !=
820819
UR_RESULT_SUCCESS) {
821820
getContext()->logger.warning(
822821
"Failed to allocate shadow memory for local "
@@ -827,25 +826,25 @@ ur_result_t AsanInterceptor::prepareLaunch(
827826
"Skip checking local memory of kernel <{}>",
828827
GetKernelName(Kernel));
829828
} else {
830-
LaunchInfo.Data->LocalShadowOffsetEnd =
831-
LaunchInfo.Data->LocalShadowOffset +
829+
LaunchInfo.Data.Host.LocalShadowOffsetEnd =
830+
LaunchInfo.Data.Host.LocalShadowOffset +
832831
LocalShadowMemorySize - 1;
833832

834833
ContextInfo->Stats.UpdateShadowMalloced(
835834
LocalShadowMemorySize);
836835

837836
getContext()->logger.info(
838837
"ShadowMemory(Local, {} - {})",
839-
(void *)LaunchInfo.Data->LocalShadowOffset,
840-
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
838+
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
839+
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
841840
}
842841
}
843842
}
844843

845844
// Write shadow memory offset for private memory
846845
if (getOptions().DetectPrivates) {
847846
if (DeviceInfo->Type == DeviceType::CPU) {
848-
LaunchInfo.Data->PrivateShadowOffset =
847+
LaunchInfo.Data.Host.PrivateShadowOffset =
849848
DeviceInfo->Shadow->ShadowBegin;
850849
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
851850
DeviceInfo->Type == DeviceType::GPU_DG2) {
@@ -858,7 +857,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
858857

859858
if (EnqueueAllocateShadowMemory(
860859
PrivateShadowMemorySize,
861-
LaunchInfo.Data->PrivateShadowOffset) !=
860+
LaunchInfo.Data.Host.PrivateShadowOffset) !=
862861
UR_RESULT_SUCCESS) {
863862
getContext()->logger.warning(
864863
"Failed to allocate shadow memory for private "
@@ -869,20 +868,41 @@ ur_result_t AsanInterceptor::prepareLaunch(
869868
"Skip checking private memory of kernel <{}>",
870869
GetKernelName(Kernel));
871870
} else {
872-
LaunchInfo.Data->PrivateShadowOffsetEnd =
873-
LaunchInfo.Data->PrivateShadowOffset +
871+
LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
872+
LaunchInfo.Data.Host.PrivateShadowOffset +
874873
PrivateShadowMemorySize - 1;
875874

876875
ContextInfo->Stats.UpdateShadowMalloced(
877876
PrivateShadowMemorySize);
878877

879878
getContext()->logger.info(
880879
"ShadowMemory(Private, {} - {})",
881-
(void *)LaunchInfo.Data->PrivateShadowOffset,
882-
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
880+
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
881+
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
883882
}
884883
}
885884
}
885+
886+
// Write local arguments info
887+
if (!KernelInfo->LocalArgs.empty()) {
888+
std::vector<LocalArgsInfo> LocalArgsInfo;
889+
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
890+
LocalArgsInfo.push_back(ArgInfo);
891+
getContext()->logger.debug(
892+
"local_args (argIndex={}, size={}, sizeWithRZ={})",
893+
ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone);
894+
}
895+
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
896+
}
897+
898+
// sync asan runtime data to device side
899+
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
900+
901+
getContext()->logger.debug(
902+
"launch_info {} (numLocalArgs={}, localArgs={})",
903+
(void *)LaunchInfo.Data.getDevicePtr(),
904+
LaunchInfo.Data.Host.NumLocalArgs,
905+
(void *)LaunchInfo.Data.Host.LocalArgs);
886906
} while (false);
887907

888908
return UR_RESULT_SUCCESS;
@@ -942,63 +962,39 @@ ContextInfo::~ContextInfo() {
942962
}
943963
}
944964

945-
ur_result_t USMLaunchInfo::initialize() {
946-
UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context));
947-
UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device));
948-
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
949-
Context, Device, nullptr, nullptr, sizeof(LaunchInfo), (void **)&Data));
950-
*Data = LaunchInfo{};
951-
return UR_RESULT_SUCCESS;
952-
}
953-
954-
ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) {
955-
auto NumArgs = KI.LocalArgs.size();
956-
if (NumArgs) {
957-
Data->NumLocalArgs = NumArgs;
958-
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
959-
Context, Device, nullptr, nullptr, sizeof(LocalArgsInfo) * NumArgs,
960-
(void **)&Data->LocalArgs));
961-
uint32_t i = 0;
962-
for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) {
963-
Data->LocalArgs[i++] = ArgInfo;
964-
getContext()->logger.debug(
965-
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
966-
ArgInfo.Size, ArgInfo.SizeWithRedZone);
967-
}
968-
}
969-
return UR_RESULT_SUCCESS;
970-
}
971-
972-
USMLaunchInfo::~USMLaunchInfo() {
965+
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
973966
[[maybe_unused]] ur_result_t Result;
974-
if (Data) {
975-
auto Type = GetDeviceType(Context, Device);
976-
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
977-
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
978-
if (Data->PrivateShadowOffset) {
979-
ContextInfo->Stats.UpdateShadowFreed(
980-
Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
981-
1);
982-
Result = getContext()->urDdiTable.USM.pfnFree(
983-
Context, (void *)Data->PrivateShadowOffset);
984-
assert(Result == UR_RESULT_SUCCESS);
985-
}
986-
if (Data->LocalShadowOffset) {
987-
ContextInfo->Stats.UpdateShadowFreed(
988-
Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1);
989-
Result = getContext()->urDdiTable.USM.pfnFree(
990-
Context, (void *)Data->LocalShadowOffset);
991-
assert(Result == UR_RESULT_SUCCESS);
992-
}
967+
auto Type = GetDeviceType(Context, Device);
968+
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
969+
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
970+
if (Host.PrivateShadowOffset) {
971+
ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd -
972+
Host.PrivateShadowOffset + 1);
973+
Result = getContext()->urDdiTable.USM.pfnFree(
974+
Context, (void *)Host.PrivateShadowOffset);
975+
assert(Result == UR_RESULT_SUCCESS);
993976
}
994-
if (Data->LocalArgs) {
977+
if (Host.LocalShadowOffset) {
978+
ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd -
979+
Host.LocalShadowOffset + 1);
995980
Result = getContext()->urDdiTable.USM.pfnFree(
996-
Context, (void *)Data->LocalArgs);
981+
Context, (void *)Host.LocalShadowOffset);
997982
assert(Result == UR_RESULT_SUCCESS);
998983
}
999-
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
984+
}
985+
if (Host.LocalArgs) {
986+
Result = getContext()->urDdiTable.USM.pfnFree(Context,
987+
(void *)Host.LocalArgs);
988+
assert(Result == UR_RESULT_SUCCESS);
989+
}
990+
if (DevicePtr) {
991+
Result = getContext()->urDdiTable.USM.pfnFree(Context, DevicePtr);
1000992
assert(Result == UR_RESULT_SUCCESS);
1001993
}
994+
}
995+
996+
LaunchInfo::~LaunchInfo() {
997+
[[maybe_unused]] ur_result_t Result;
1002998
Result = getContext()->urDdiTable.Context.pfnRelease(Context);
1003999
assert(Result == UR_RESULT_SUCCESS);
10041000
Result = getContext()->urDdiTable.Device.pfnRelease(Device);

0 commit comments

Comments
 (0)