@@ -249,15 +249,11 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
249
249
250
250
ur_result_t AsanInterceptor::preLaunchKernel (ur_kernel_handle_t Kernel,
251
251
ur_queue_handle_t Queue,
252
- USMLaunchInfo &LaunchInfo) {
252
+ LaunchInfo &LaunchInfo) {
253
253
auto Context = GetContext (Queue);
254
254
auto Device = GetDevice (Queue);
255
255
auto ContextInfo = getContextInfo (Context);
256
256
auto DeviceInfo = getDeviceInfo (Device);
257
- auto KernelInfo = getKernelInfo (Kernel);
258
- assert (KernelInfo && " Kernel should be instrumented" );
259
-
260
- UR_CALL (LaunchInfo.updateKernelInfo (*KernelInfo.get ()));
261
257
262
258
ManagedQueue InternalQueue (Context, Device);
263
259
if (!InternalQueue) {
@@ -275,12 +271,14 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
275
271
276
272
ur_result_t AsanInterceptor::postLaunchKernel (ur_kernel_handle_t Kernel,
277
273
ur_queue_handle_t Queue,
278
- USMLaunchInfo &LaunchInfo) {
274
+ LaunchInfo &LaunchInfo) {
279
275
// FIXME: We must use block operation here, until we support urEventSetCallback
280
276
auto Result = getContext ()->urDdiTable .Queue .pfnFinish (Queue);
281
277
278
+ UR_CALL (LaunchInfo.Data .syncFromDevice (Queue));
279
+
282
280
if (Result == UR_RESULT_SUCCESS) {
283
- for (const auto &Report : LaunchInfo.Data -> Report ) {
281
+ for (const auto &Report : LaunchInfo.Data . Host . Report ) {
284
282
if (!Report.Flag ) {
285
283
continue ;
286
284
}
@@ -685,7 +683,7 @@ AsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
685
683
ur_result_t AsanInterceptor::prepareLaunch (
686
684
std::shared_ptr<ContextInfo> &ContextInfo,
687
685
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
688
- ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
686
+ ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
689
687
690
688
do {
691
689
auto KernelInfo = getKernelInfo (Kernel);
@@ -721,27 +719,20 @@ ur_result_t AsanInterceptor::prepareLaunch(
721
719
}
722
720
}
723
721
724
- // Set launch info argument
725
722
auto ArgNums = GetKernelNumArgs (Kernel);
723
+ // We must prepare all kernel args before call
724
+ // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
725
+ // CPU device.
726
726
if (ArgNums) {
727
- getContext ()->logger .debug (
728
- " launch_info {} (numLocalArgs={}, localArgs={})" ,
729
- (void *)LaunchInfo.Data , LaunchInfo.Data ->NumLocalArgs ,
730
- (void *)LaunchInfo.Data ->LocalArgs );
731
727
ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
732
- Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data );
728
+ Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data . getDevicePtr () );
733
729
if (URes != UR_RESULT_SUCCESS) {
734
730
getContext ()->logger .error (" Failed to set launch info: {}" ,
735
731
URes);
736
732
return URes;
737
733
}
738
734
}
739
735
740
- LaunchInfo.Data ->GlobalShadowOffset = DeviceInfo->Shadow ->ShadowBegin ;
741
- LaunchInfo.Data ->GlobalShadowOffsetEnd = DeviceInfo->Shadow ->ShadowEnd ;
742
- LaunchInfo.Data ->DeviceTy = DeviceInfo->Type ;
743
- LaunchInfo.Data ->Debug = getOptions ().Debug ? 1 : 0 ;
744
-
745
736
if (LaunchInfo.LocalWorkSize .empty ()) {
746
737
LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
747
738
auto URes =
@@ -768,6 +759,14 @@ ur_result_t AsanInterceptor::prepareLaunch(
768
759
LocalWorkSize[Dim];
769
760
}
770
761
762
+ // Prepare asan runtime data
763
+ LaunchInfo.Data .Host .GlobalShadowOffset =
764
+ DeviceInfo->Shadow ->ShadowBegin ;
765
+ LaunchInfo.Data .Host .GlobalShadowOffsetEnd =
766
+ DeviceInfo->Shadow ->ShadowEnd ;
767
+ LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
768
+ LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
769
+
771
770
auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle ,
772
771
Device = DeviceInfo->Handle ,
773
772
Queue](size_t Size , uptr &Ptr ) {
@@ -816,7 +815,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
816
815
817
816
if (EnqueueAllocateShadowMemory (
818
817
LocalShadowMemorySize,
819
- LaunchInfo.Data -> LocalShadowOffset ) !=
818
+ LaunchInfo.Data . Host . LocalShadowOffset ) !=
820
819
UR_RESULT_SUCCESS) {
821
820
getContext ()->logger .warning (
822
821
" Failed to allocate shadow memory for local "
@@ -827,25 +826,25 @@ ur_result_t AsanInterceptor::prepareLaunch(
827
826
" Skip checking local memory of kernel <{}>" ,
828
827
GetKernelName (Kernel));
829
828
} else {
830
- LaunchInfo.Data -> LocalShadowOffsetEnd =
831
- LaunchInfo.Data -> LocalShadowOffset +
829
+ LaunchInfo.Data . Host . LocalShadowOffsetEnd =
830
+ LaunchInfo.Data . Host . LocalShadowOffset +
832
831
LocalShadowMemorySize - 1 ;
833
832
834
833
ContextInfo->Stats .UpdateShadowMalloced (
835
834
LocalShadowMemorySize);
836
835
837
836
getContext ()->logger .info (
838
837
" ShadowMemory(Local, {} - {})" ,
839
- (void *)LaunchInfo.Data -> LocalShadowOffset ,
840
- (void *)LaunchInfo.Data -> LocalShadowOffsetEnd );
838
+ (void *)LaunchInfo.Data . Host . LocalShadowOffset ,
839
+ (void *)LaunchInfo.Data . Host . LocalShadowOffsetEnd );
841
840
}
842
841
}
843
842
}
844
843
845
844
// Write shadow memory offset for private memory
846
845
if (getOptions ().DetectPrivates ) {
847
846
if (DeviceInfo->Type == DeviceType::CPU) {
848
- LaunchInfo.Data -> PrivateShadowOffset =
847
+ LaunchInfo.Data . Host . PrivateShadowOffset =
849
848
DeviceInfo->Shadow ->ShadowBegin ;
850
849
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
851
850
DeviceInfo->Type == DeviceType::GPU_DG2) {
@@ -858,7 +857,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
858
857
859
858
if (EnqueueAllocateShadowMemory (
860
859
PrivateShadowMemorySize,
861
- LaunchInfo.Data -> PrivateShadowOffset ) !=
860
+ LaunchInfo.Data . Host . PrivateShadowOffset ) !=
862
861
UR_RESULT_SUCCESS) {
863
862
getContext ()->logger .warning (
864
863
" Failed to allocate shadow memory for private "
@@ -869,20 +868,41 @@ ur_result_t AsanInterceptor::prepareLaunch(
869
868
" Skip checking private memory of kernel <{}>" ,
870
869
GetKernelName (Kernel));
871
870
} else {
872
- LaunchInfo.Data -> PrivateShadowOffsetEnd =
873
- LaunchInfo.Data -> PrivateShadowOffset +
871
+ LaunchInfo.Data . Host . PrivateShadowOffsetEnd =
872
+ LaunchInfo.Data . Host . PrivateShadowOffset +
874
873
PrivateShadowMemorySize - 1 ;
875
874
876
875
ContextInfo->Stats .UpdateShadowMalloced (
877
876
PrivateShadowMemorySize);
878
877
879
878
getContext ()->logger .info (
880
879
" ShadowMemory(Private, {} - {})" ,
881
- (void *)LaunchInfo.Data -> PrivateShadowOffset ,
882
- (void *)LaunchInfo.Data -> PrivateShadowOffsetEnd );
880
+ (void *)LaunchInfo.Data . Host . PrivateShadowOffset ,
881
+ (void *)LaunchInfo.Data . Host . PrivateShadowOffsetEnd );
883
882
}
884
883
}
885
884
}
885
+
886
+ // Write local arguments info
887
+ if (!KernelInfo->LocalArgs .empty ()) {
888
+ std::vector<LocalArgsInfo> LocalArgsInfo;
889
+ for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
890
+ LocalArgsInfo.push_back (ArgInfo);
891
+ getContext ()->logger .debug (
892
+ " local_args (argIndex={}, size={}, sizeWithRZ={})" ,
893
+ ArgIndex, ArgInfo.Size , ArgInfo.SizeWithRedZone );
894
+ }
895
+ UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
896
+ }
897
+
898
+ // sync asan runtime data to device side
899
+ UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
900
+
901
+ getContext ()->logger .debug (
902
+ " launch_info {} (numLocalArgs={}, localArgs={})" ,
903
+ (void *)LaunchInfo.Data .getDevicePtr (),
904
+ LaunchInfo.Data .Host .NumLocalArgs ,
905
+ (void *)LaunchInfo.Data .Host .LocalArgs );
886
906
} while (false );
887
907
888
908
return UR_RESULT_SUCCESS;
@@ -942,63 +962,39 @@ ContextInfo::~ContextInfo() {
942
962
}
943
963
}
944
964
945
- ur_result_t USMLaunchInfo::initialize () {
946
- UR_CALL (getContext ()->urDdiTable .Context .pfnRetain (Context));
947
- UR_CALL (getContext ()->urDdiTable .Device .pfnRetain (Device));
948
- UR_CALL (getContext ()->urDdiTable .USM .pfnSharedAlloc (
949
- Context, Device, nullptr , nullptr , sizeof (LaunchInfo), (void **)&Data));
950
- *Data = LaunchInfo{};
951
- return UR_RESULT_SUCCESS;
952
- }
953
-
954
- ur_result_t USMLaunchInfo::updateKernelInfo (const KernelInfo &KI) {
955
- auto NumArgs = KI.LocalArgs .size ();
956
- if (NumArgs) {
957
- Data->NumLocalArgs = NumArgs;
958
- UR_CALL (getContext ()->urDdiTable .USM .pfnSharedAlloc (
959
- Context, Device, nullptr , nullptr , sizeof (LocalArgsInfo) * NumArgs,
960
- (void **)&Data->LocalArgs ));
961
- uint32_t i = 0 ;
962
- for (auto [ArgIndex, ArgInfo] : KI.LocalArgs ) {
963
- Data->LocalArgs [i++] = ArgInfo;
964
- getContext ()->logger .debug (
965
- " local_args (argIndex={}, size={}, sizeWithRZ={})" , ArgIndex,
966
- ArgInfo.Size , ArgInfo.SizeWithRedZone );
967
- }
968
- }
969
- return UR_RESULT_SUCCESS;
970
- }
971
-
972
- USMLaunchInfo::~USMLaunchInfo () {
965
+ AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper () {
973
966
[[maybe_unused]] ur_result_t Result;
974
- if (Data) {
975
- auto Type = GetDeviceType (Context, Device);
976
- auto ContextInfo = getAsanInterceptor ()->getContextInfo (Context);
977
- if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
978
- if (Data->PrivateShadowOffset ) {
979
- ContextInfo->Stats .UpdateShadowFreed (
980
- Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
981
- 1 );
982
- Result = getContext ()->urDdiTable .USM .pfnFree (
983
- Context, (void *)Data->PrivateShadowOffset );
984
- assert (Result == UR_RESULT_SUCCESS);
985
- }
986
- if (Data->LocalShadowOffset ) {
987
- ContextInfo->Stats .UpdateShadowFreed (
988
- Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1 );
989
- Result = getContext ()->urDdiTable .USM .pfnFree (
990
- Context, (void *)Data->LocalShadowOffset );
991
- assert (Result == UR_RESULT_SUCCESS);
992
- }
967
+ auto Type = GetDeviceType (Context, Device);
968
+ auto ContextInfo = getAsanInterceptor ()->getContextInfo (Context);
969
+ if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
970
+ if (Host.PrivateShadowOffset ) {
971
+ ContextInfo->Stats .UpdateShadowFreed (Host.PrivateShadowOffsetEnd -
972
+ Host.PrivateShadowOffset + 1 );
973
+ Result = getContext ()->urDdiTable .USM .pfnFree (
974
+ Context, (void *)Host.PrivateShadowOffset );
975
+ assert (Result == UR_RESULT_SUCCESS);
993
976
}
994
- if (Data->LocalArgs ) {
977
+ if (Host.LocalShadowOffset ) {
978
+ ContextInfo->Stats .UpdateShadowFreed (Host.LocalShadowOffsetEnd -
979
+ Host.LocalShadowOffset + 1 );
995
980
Result = getContext ()->urDdiTable .USM .pfnFree (
996
- Context, (void *)Data-> LocalArgs );
981
+ Context, (void *)Host. LocalShadowOffset );
997
982
assert (Result == UR_RESULT_SUCCESS);
998
983
}
999
- Result = getContext ()->urDdiTable .USM .pfnFree (Context, (void *)Data);
984
+ }
985
+ if (Host.LocalArgs ) {
986
+ Result = getContext ()->urDdiTable .USM .pfnFree (Context,
987
+ (void *)Host.LocalArgs );
988
+ assert (Result == UR_RESULT_SUCCESS);
989
+ }
990
+ if (DevicePtr) {
991
+ Result = getContext ()->urDdiTable .USM .pfnFree (Context, DevicePtr);
1000
992
assert (Result == UR_RESULT_SUCCESS);
1001
993
}
994
+ }
995
+
996
+ LaunchInfo::~LaunchInfo () {
997
+ [[maybe_unused]] ur_result_t Result;
1002
998
Result = getContext ()->urDdiTable .Context .pfnRelease (Context);
1003
999
assert (Result == UR_RESULT_SUCCESS);
1004
1000
Result = getContext ()->urDdiTable .Device .pfnRelease (Device);
0 commit comments