@@ -714,16 +714,21 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
714
714
EnqueueWriteGlobal (kSPIR_DeviceType , &DeviceInfo->Type ,
715
715
sizeof (DeviceInfo->Type ));
716
716
717
- if (DeviceInfo->Type == DeviceType::CPU) {
718
- break ;
719
- }
720
-
721
717
if (LaunchInfo.LocalWorkSize .empty ()) {
722
- LaunchInfo.LocalWorkSize .reserve (3 );
723
- // FIXME: This is W/A until urKernelSuggestGroupSize is added
724
- LaunchInfo.LocalWorkSize [0 ] = 1 ;
725
- LaunchInfo.LocalWorkSize [1 ] = 1 ;
726
- LaunchInfo.LocalWorkSize [2 ] = 1 ;
718
+ LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
719
+ auto URes = context.urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
720
+ Kernel, Queue, LaunchInfo.WorkDim , LaunchInfo.GlobalWorkOffset ,
721
+ LaunchInfo.GlobalWorkSize , LaunchInfo.LocalWorkSize .data ());
722
+ if (URes != UR_RESULT_SUCCESS) {
723
+ if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
724
+ return URes;
725
+ }
726
+ // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
727
+ // to inefficient implementation
728
+ for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
729
+ LaunchInfo.LocalWorkSize [Dim] = 1 ;
730
+ }
731
+ }
727
732
}
728
733
729
734
const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
@@ -733,56 +738,109 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
733
738
LocalWorkSize[Dim];
734
739
}
735
740
736
- auto EnqueueAllocateDevice = [Context, &DeviceInfo, Queue,
737
- NumWG](size_t Size, uptr &Ptr) {
741
+ auto EnqueueAllocateShadowMemory = [Context, &DeviceInfo,
742
+ Queue](size_t Size, uptr &Ptr) {
743
+ void *Allocated = nullptr ;
738
744
auto URes = context.urDdiTable .USM .pfnDeviceAlloc (
739
745
Context, DeviceInfo->Handle , nullptr , nullptr , Size,
740
- ( void **)&Ptr );
746
+ &Allocated );
741
747
if (URes != UR_RESULT_SUCCESS) {
742
- context.logger .error (
743
- " Failed to allocate shadow memory for local memory: {}" ,
744
- URes);
745
- context.logger .error (
746
- " Maybe the number of workgroup ({}) too large" , NumWG);
747
748
return URes;
748
749
}
749
- // Initialize shadow memory of local memory
750
- URes = urEnqueueUSMSet (Queue, (void *)Ptr, 0 , Size);
751
- if (URes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
752
- context.logger .error (
753
- " Failed to allocate shadow memory for local memory: {}" ,
754
- URes);
755
- context.logger .error (
756
- " Maybe the number of workgroup ({}) too large" , NumWG);
757
- return URes;
750
+ // Initialize shadow memory
751
+ URes = urEnqueueUSMSet (Queue, Allocated, 0 , Size);
752
+ if (URes != UR_RESULT_SUCCESS) {
753
+ [[maybe_unused]] auto URes =
754
+ context.urDdiTable .USM .pfnFree (Context, Allocated);
755
+ assert (URes == UR_RESULT_SUCCESS &&
756
+ " urUSMFree failed at allocating shadow memory" );
757
+ Allocated = nullptr ;
758
758
}
759
+ Ptr = (uptr)Allocated;
759
760
return URes;
760
761
};
761
762
763
+ auto LocalMemoryUsage =
764
+ GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
765
+ auto PrivateMemoryUsage =
766
+ GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
767
+
768
+ context.logger .info (" KernelInfo {} (LocalMemory={}, PrivateMemory={})" ,
769
+ (void *)Kernel, LocalMemoryUsage,
770
+ PrivateMemoryUsage);
771
+
762
772
// Write shadow memory offset for local memory
763
773
if (Options ().DetectLocals ) {
764
774
// CPU needn't this
765
775
if (DeviceInfo->Type == DeviceType::GPU_PVC) {
766
- size_t LocalMemorySize = GetLocalMemorySize (DeviceInfo->Handle );
767
- size_t LocalShadowMemorySize =
776
+ const size_t LocalMemorySize =
777
+ GetDeviceLocalMemorySize (DeviceInfo->Handle );
778
+ const size_t LocalShadowMemorySize =
768
779
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
769
780
770
781
context.logger .debug (
771
- " LocalMemoryInfo (WorkGroup={}, LocalMemorySize={}, "
782
+ " LocalMemory (WorkGroup={}, LocalMemorySize={}, "
772
783
" LocalShadowMemorySize={})" ,
773
784
NumWG, LocalMemorySize, LocalShadowMemorySize);
774
785
775
- UR_CALL (EnqueueAllocateDevice (
776
- LocalShadowMemorySize, LaunchInfo.Data ->LocalShadowOffset ));
777
-
778
- LaunchInfo.Data ->LocalShadowOffsetEnd =
779
- LaunchInfo.Data ->LocalShadowOffset + LocalShadowMemorySize -
780
- 1 ;
786
+ if (EnqueueAllocateShadowMemory (
787
+ LocalShadowMemorySize,
788
+ LaunchInfo.Data ->LocalShadowOffset ) !=
789
+ UR_RESULT_SUCCESS) {
790
+ context.logger .warning (
791
+ " Failed to allocate shadow memory for local "
792
+ " memory, maybe the number of workgroup ({}) is too "
793
+ " large" ,
794
+ NumWG);
795
+ context.logger .warning (
796
+ " Skip checking local memory of kernel <{}>" ,
797
+ GetKernelName (Kernel));
798
+ } else {
799
+ LaunchInfo.Data ->LocalShadowOffsetEnd =
800
+ LaunchInfo.Data ->LocalShadowOffset +
801
+ LocalShadowMemorySize - 1 ;
802
+
803
+ context.logger .info (
804
+ " ShadowMemory(Local, {} - {})" ,
805
+ (void *)LaunchInfo.Data ->LocalShadowOffset ,
806
+ (void *)LaunchInfo.Data ->LocalShadowOffsetEnd );
807
+ }
808
+ }
809
+ }
781
810
782
- context.logger .info (
783
- " ShadowMemory(Local, {} - {})" ,
784
- (void *)LaunchInfo.Data ->LocalShadowOffset ,
785
- (void *)LaunchInfo.Data ->LocalShadowOffsetEnd );
811
+ // Write shadow memory offset for private memory
812
+ if (Options ().DetectPrivates ) {
813
+ if (DeviceInfo->Type == DeviceType::CPU) {
814
+ LaunchInfo.Data ->PrivateShadowOffset = DeviceInfo->ShadowOffset ;
815
+ } else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
816
+ const size_t PrivateShadowMemorySize =
817
+ (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
818
+
819
+ context.logger .debug (" PrivateMemory(WorkGroup={}, "
820
+ " PrivateShadowMemorySize={})" ,
821
+ NumWG, PrivateShadowMemorySize);
822
+
823
+ if (EnqueueAllocateShadowMemory (
824
+ PrivateShadowMemorySize,
825
+ LaunchInfo.Data ->PrivateShadowOffset ) !=
826
+ UR_RESULT_SUCCESS) {
827
+ context.logger .warning (
828
+ " Failed to allocate shadow memory for private "
829
+ " memory, maybe the number of workgroup ({}) is too "
830
+ " large" ,
831
+ NumWG);
832
+ context.logger .warning (
833
+ " Skip checking private memory of kernel <{}>" ,
834
+ GetKernelName (Kernel));
835
+ } else {
836
+ LaunchInfo.Data ->PrivateShadowOffsetEnd =
837
+ LaunchInfo.Data ->PrivateShadowOffset +
838
+ PrivateShadowMemorySize - 1 ;
839
+ context.logger .info (
840
+ " ShadowMemory(Private, {} - {})" ,
841
+ (void *)LaunchInfo.Data ->PrivateShadowOffset ,
842
+ (void *)LaunchInfo.Data ->PrivateShadowOffsetEnd );
843
+ }
786
844
}
787
845
}
788
846
} while (false );
0 commit comments