32
32
33
33
hipSuccess = 0
34
34
35
+
35
36
def hip_check (call_result ):
37
+ """helper function to check return values of hip calls"""
36
38
err = call_result [0 ]
37
39
result = call_result [1 :]
38
40
if len (result ) == 1 :
@@ -41,6 +43,7 @@ def hip_check(call_result):
41
43
raise RuntimeError (str (err ))
42
44
return result
43
45
46
+
44
47
class HipFunctions (GPUBackend ):
45
48
"""Class that groups the HIP functions on maintains state about the device."""
46
49
@@ -59,7 +62,9 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
59
62
:type iterations: int
60
63
"""
61
64
if not hip or not hiprtc :
62
- raise ImportError ("Unable to import HIP Python, check https://kerneltuner.github.io/kernel_tuner/stable/install.html#hip-and-hip-python." )
65
+ raise ImportError (
66
+ "Unable to import HIP Python, check https://kerneltuner.github.io/kernel_tuner/stable/install.html#hip-and-hip-python."
67
+ )
63
68
64
69
# embedded in try block to be able to generate documentation
65
70
# and run tests without HIP Python installed
@@ -69,7 +74,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
69
74
props = hip .hipDeviceProp_t ()
70
75
hip_check (hip .hipGetDeviceProperties (props , device ))
71
76
72
- self .name = props .name .decode (' utf-8' )
77
+ self .name = props .name .decode (" utf-8" )
73
78
self .max_threads = props .maxThreadsPerBlock
74
79
self .device = device
75
80
self .compiler_options = compiler_options or []
@@ -81,7 +86,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
81
86
env ["compiler_options" ] = compiler_options
82
87
self .env = env
83
88
84
- # Create stream and events
89
+ # Create stream and events
85
90
self .stream = hip_check (hip .hipStreamCreate ())
86
91
self .start = hip_check (hip .hipEventCreate ())
87
92
self .end = hip_check (hip .hipEventCreate ())
@@ -108,40 +113,34 @@ def ready_argument_list(self, arguments):
108
113
"""
109
114
logging .debug ("HipFunction ready_argument_list called" )
110
115
prepared_args = []
111
-
116
+
112
117
for arg in arguments :
113
118
dtype_str = str (arg .dtype )
114
-
119
+
115
120
# Handle numpy arrays
116
121
if isinstance (arg , np .ndarray ):
117
122
if dtype_str in dtype_map .keys ():
118
123
# Allocate device memory
119
124
device_ptr = hip_check (hip .hipMalloc (arg .nbytes ))
120
-
125
+
121
126
# Copy data to device using hipMemcpy
122
- hip_check (hip .hipMemcpy (
123
- device_ptr ,
124
- arg ,
125
- arg .nbytes ,
126
- hip .hipMemcpyKind .hipMemcpyHostToDevice
127
- ))
128
-
127
+ hip_check (hip .hipMemcpy (device_ptr , arg , arg .nbytes , hip .hipMemcpyKind .hipMemcpyHostToDevice ))
128
+
129
129
prepared_args .append (device_ptr )
130
130
else :
131
131
raise TypeError (f"Unknown dtype { dtype_str } for ndarray" )
132
-
132
+
133
133
# Handle numpy scalar types
134
134
elif isinstance (arg , np .generic ):
135
135
# Convert numpy scalar to corresponding ctypes
136
136
ctype_arg = dtype_map [dtype_str ](arg )
137
137
prepared_args .append (ctype_arg )
138
-
138
+
139
139
else :
140
140
raise ValueError (f"Invalid argument type { type (arg )} , { arg } " )
141
141
142
142
return prepared_args
143
143
144
-
145
144
def compile (self , kernel_instance ):
146
145
"""Call the HIP compiler to compile the kernel, return the function.
147
146
@@ -159,28 +158,22 @@ def compile(self, kernel_instance):
159
158
kernel_name = kernel_instance .name
160
159
if 'extern "C"' not in kernel_string :
161
160
kernel_string = 'extern "C" {\n ' + kernel_string + "\n }"
162
-
161
+
163
162
# Create program
164
- prog = hip_check (hiprtc .hiprtcCreateProgram (
165
- kernel_string .encode (),
166
- kernel_name .encode (),
167
- 0 ,
168
- [],
169
- []
170
- ))
163
+ prog = hip_check (hiprtc .hiprtcCreateProgram (kernel_string .encode (), kernel_name .encode (), 0 , [], []))
171
164
172
165
try :
173
166
# Get device properties
174
167
props = hip .hipDeviceProp_t ()
175
168
hip_check (hip .hipGetDeviceProperties (props , 0 ))
176
-
169
+
177
170
# Setup compilation options
178
171
arch = props .gcnArchName
179
172
cflags = [b"--offload-arch=" + arch ]
180
173
cflags .extend ([opt .encode () if isinstance (opt , str ) else opt for opt in self .compiler_options ])
181
174
182
175
# Compile program
183
- err , = hiprtc .hiprtcCompileProgram (prog , len (cflags ), cflags )
176
+ ( err ,) = hiprtc .hiprtcCompileProgram (prog , len (cflags ), cflags )
184
177
if err != hiprtc .hiprtcResult .HIPRTC_SUCCESS :
185
178
# Get compilation log if there's an error
186
179
log_size = hip_check (hiprtc .hiprtcGetProgramLogSize (prog ))
@@ -208,19 +201,19 @@ def compile(self, kernel_instance):
208
201
def start_event (self ):
209
202
"""Records the event that marks the start of a measurement."""
210
203
logging .debug ("HipFunction start_event called" )
211
-
204
+
212
205
hip_check (hip .hipEventRecord (self .start , self .stream ))
213
206
214
207
def stop_event (self ):
215
208
"""Records the event that marks the end of a measurement."""
216
209
logging .debug ("HipFunction stop_event called" )
217
-
210
+
218
211
hip_check (hip .hipEventRecord (self .end , self .stream ))
219
212
220
213
def kernel_finished (self ):
221
214
"""Returns True if the kernel has finished, False otherwise."""
222
215
logging .debug ("HipFunction kernel_finished called" )
223
-
216
+
224
217
# ROCm HIP returns (hipError_t, bool) for hipEventQuery
225
218
status = hip .hipEventQuery (self .end )
226
219
if status [0 ] == hip .hipError_t .hipSuccess :
@@ -233,7 +226,7 @@ def kernel_finished(self):
233
226
def synchronize (self ):
234
227
"""Halts execution until device has finished its tasks."""
235
228
logging .debug ("HipFunction synchronize called" )
236
-
229
+
237
230
hip_check (hip .hipDeviceSynchronize ())
238
231
239
232
def run_kernel (self , func , gpu_args , threads , grid , stream = None ):
@@ -242,7 +235,7 @@ def run_kernel(self, func, gpu_args, threads, grid, stream=None):
242
235
:param func: A HIP kernel compiled for this specific kernel configuration
243
236
:type func: hipFunction_t
244
237
245
- :param gpu_args: List of arguments to pass to the kernel. Can be DeviceArray
238
+ :param gpu_args: List of arguments to pass to the kernel. Can be DeviceArray
246
239
objects or ctypes values
247
240
:type gpu_args: list
248
241
@@ -272,7 +265,7 @@ def run_kernel(self, func, gpu_args, threads, grid, stream=None):
272
265
sharedMemBytes = self .smem_size ,
273
266
stream = stream ,
274
267
kernelParams = None ,
275
- extra = tuple (gpu_args )
268
+ extra = tuple (gpu_args ),
276
269
)
277
270
)
278
271
@@ -303,12 +296,7 @@ def memcpy_dtoh(self, dest, src):
303
296
"""
304
297
logging .debug ("HipFunction memcpy_dtoh called" )
305
298
306
- hip_check (hip .hipMemcpy (
307
- dest ,
308
- src ,
309
- dest .nbytes ,
310
- hip .hipMemcpyKind .hipMemcpyDeviceToHost
311
- ))
299
+ hip_check (hip .hipMemcpy (dest , src , dest .nbytes , hip .hipMemcpyKind .hipMemcpyDeviceToHost ))
312
300
313
301
def memcpy_htod (self , dest , src ):
314
302
"""Perform a host to device memory copy.
@@ -321,12 +309,7 @@ def memcpy_htod(self, dest, src):
321
309
"""
322
310
logging .debug ("HipFunction memcpy_htod called" )
323
311
324
- hip_check (hip .hipMemcpy (
325
- dest ,
326
- src ,
327
- src .nbytes ,
328
- hip .hipMemcpyKind .hipMemcpyHostToDevice
329
- ))
312
+ hip_check (hip .hipMemcpy (dest , src , src .nbytes , hip .hipMemcpyKind .hipMemcpyHostToDevice ))
330
313
331
314
def copy_constant_memory_args (self , cmem_args ):
332
315
"""Adds constant memory arguments to the most recently compiled module.
@@ -343,18 +326,10 @@ def copy_constant_memory_args(self, cmem_args):
343
326
# Iterate over dictionary
344
327
for symbol_name , data in cmem_args .items ():
345
328
# Get symbol pointer and size using hipModuleGetGlobal
346
- dptr , _ = hip_check (hip .hipModuleGetGlobal (
347
- self .current_module ,
348
- symbol_name .encode ()
349
- ))
329
+ dptr , _ = hip_check (hip .hipModuleGetGlobal (self .current_module , symbol_name .encode ()))
350
330
351
331
# Copy data to the global memory location
352
- hip_check (hip .hipMemcpy (
353
- dptr ,
354
- data ,
355
- data .nbytes ,
356
- hip .hipMemcpyKind .hipMemcpyHostToDevice
357
- ))
332
+ hip_check (hip .hipMemcpy (dptr , data , data .nbytes , hip .hipMemcpyKind .hipMemcpyHostToDevice ))
358
333
359
334
def copy_shared_memory_args (self , smem_args ):
360
335
"""Add shared memory arguments to the kernel."""
0 commit comments