@@ -187,6 +187,15 @@ def benchmarks(self) -> list[Benchmark]:
187
187
MemcpyExecute (self , 400 , 1 , 102400 , 10 , 1 , 1 , 1 ),
188
188
MemcpyExecute (self , 400 , 1 , 102400 , 10 , 0 , 1 , 1 ),
189
189
MemcpyExecute (self , 4096 , 4 , 1024 , 10 , 0 , 1 , 0 ),
190
+ UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 , "Both" ),
191
+ UsmMemoryAllocation (self , RUNTIMES .UR , "Device" , 256 * 1024 , "Both" ),
192
+ UsmBatchMemoryAllocation (self , RUNTIMES .UR , "Device" , 128 , 256 , "Both" ),
193
+ UsmBatchMemoryAllocation (
194
+ self , RUNTIMES .UR , "Device" , 128 , 16 * 1024 , "Both"
195
+ ),
196
+ UsmBatchMemoryAllocation (
197
+ self , RUNTIMES .UR , "Device" , 128 , 128 * 1024 , "Both"
198
+ ),
190
199
]
191
200
192
201
return benches
@@ -716,3 +725,101 @@ def bin_args(self) -> list[str]:
716
725
f"--ioq={ self .ioq } " ,
717
726
f"--ctrBasedEvents={ self .ctrBasedEvents } " ,
718
727
]
728
+
729
+
730
+ class UsmMemoryAllocation (ComputeBenchmark ):
731
+ def __init__ (
732
+ self , bench , runtime : RUNTIMES , usm_memory_placement , size , measure_mode
733
+ ):
734
+ self .runtime = runtime
735
+ self .usm_memory_placement = usm_memory_placement
736
+ self .size = size
737
+ self .measure_mode = measure_mode
738
+ super ().__init__ (
739
+ bench , f"api_overhead_benchmark_{ runtime .value } " , "UsmMemoryAllocation"
740
+ )
741
+
742
+ def get_tags (self ):
743
+ return [runtime_to_tag_name (self .runtime ), "micro" , "latency" , "memory" ]
744
+
745
+ def name (self ):
746
+ return (
747
+ f"api_overhead_benchmark_{ self .runtime .value } UsmMemoryAllocation "
748
+ f"usmMemoryPlacement:{ self .usm_memory_placement } size:{ self .size } measureMode:{ self .measure_mode } "
749
+ )
750
+
751
+ def explicit_group (self ):
752
+ return f"UsmMemoryAllocation"
753
+
754
+ def description (self ) -> str :
755
+ what_is_measured = "Both memory allocation and memory free are timed"
756
+ if self .measure_mode == "Allocate" :
757
+ what_is_measured = "Only memory allocation is timed"
758
+ elif self .measure_mode == "Free" :
759
+ what_is_measured = "Only memory free is timed"
760
+ return (
761
+ f"Measures memory allocation overhead by allocating { self .size } bytes of "
762
+ f"usm { self .usm_memory_placement } memory and free'ing it immediately. "
763
+ f"{ what_is_measured } . "
764
+ )
765
+
766
+ def bin_args (self ) -> list [str ]:
767
+ return [
768
+ f"--type={ self .usm_memory_placement } " ,
769
+ f"--size={ self .size } " ,
770
+ f"--measureMode={ self .measure_mode } " ,
771
+ "--iterations=10000" ,
772
+ ]
773
+
774
+
775
+ class UsmBatchMemoryAllocation (ComputeBenchmark ):
776
+ def __init__ (
777
+ self ,
778
+ bench ,
779
+ runtime : RUNTIMES ,
780
+ usm_memory_placement ,
781
+ allocation_count ,
782
+ size ,
783
+ measure_mode ,
784
+ ):
785
+ self .runtime = runtime
786
+ self .usm_memory_placement = usm_memory_placement
787
+ self .allocation_count = allocation_count
788
+ self .size = size
789
+ self .measure_mode = measure_mode
790
+ super ().__init__ (
791
+ bench , f"api_overhead_benchmark_{ runtime .value } " , "UsmBatchMemoryAllocation"
792
+ )
793
+
794
+ def get_tags (self ):
795
+ return [runtime_to_tag_name (self .runtime ), "micro" , "latency" , "memory" ]
796
+
797
+ def name (self ):
798
+ return (
799
+ f"api_overhead_benchmark_{ self .runtime .value } UsmBatchMemoryAllocation "
800
+ f"usmMemoryPlacement:{ self .usm_memory_placement } allocationCount:{ self .allocation_count } size:{ self .size } measureMode:{ self .measure_mode } "
801
+ )
802
+
803
+ def explicit_group (self ):
804
+ return f"UsmBatchMemoryAllocation"
805
+
806
+ def description (self ) -> str :
807
+ what_is_measured = "Both memory allocation and memory free are timed"
808
+ if self .measure_mode == "Allocate" :
809
+ what_is_measured = "Only memory allocation is timed"
810
+ elif self .measure_mode == "Free" :
811
+ what_is_measured = "Only memory free is timed"
812
+ return (
813
+ f"Measures memory allocation overhead by allocating { self .size } bytes of "
814
+ f"usm { self .usm_memory_placement } memory { self .allocation_count } times, then free'ing it all at once. "
815
+ f"{ what_is_measured } . "
816
+ )
817
+
818
+ def bin_args (self ) -> list [str ]:
819
+ return [
820
+ f"--type={ self .usm_memory_placement } " ,
821
+ f"--allocationCount={ self .allocation_count } " ,
822
+ f"--size={ self .size } " ,
823
+ f"--measureMode={ self .measure_mode } " ,
824
+ "--iterations=1000" ,
825
+ ]
0 commit comments