mimicking CUDA

leios · leios · commit 56b5b9726444 · 2024-07-22T23:52:33.000+02:00
diff --git a/src/gpuarrays.jl b/src/gpuarrays.jl
@@ -10,15 +10,16 @@ import KernelAbstractions: Backend
 
 ## execution
 
-@inline function GPUArrays.launch_heuristic(::oneAPIBackend, f::F, args::Vararg{Any,N};
-                                             elements::Int, elements_per_thread::Int) where {F,N}
-    ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, nothing,
+@inline function GPUArrays.launch_heuristic(::oneAPIBackend, obj::O, args::Vararg{Any,N};
+                                             elements::Int, elements_per_thread::Int) where {O,N}
+    ndrange = ceil(Int, elements / elements_per_thread)
+    ndrange, workgroupsize, iterspace, dynamic = KA.launch_config(obj, ndrange,
                                                                   nothing)
 
     # this might not be the final context, since we may tune the workgroupsize
     ctx = KA.mkcontext(obj, ndrange, iterspace)
 
-    kernel = @oneapi launch=false f(ctx, args...)
+    kernel = @oneapi launch=false obj.f(ctx, args...)
 
     items = launch_configuration(kernel)
     # XXX: how many groups is a good number? the API doesn't tell us.