1
1
# GPUArrays.jl interface
2
2
3
+ import KernelAbstractions
4
+ import KernelAbstractions: Backend
3
5
4
6
#
5
7
# Device functionality
8
10
9
11
# # execution
10
12
11
- struct CuArrayBackend <: AbstractGPUBackend end
12
-
13
- struct CuKernelContext <: AbstractKernelContext end
13
+ struct CuArrayBackend <: Backend end
14
14
15
15
@inline function GPUArrays. launch_heuristic (:: CuArrayBackend , f:: F , args:: Vararg{Any,N} ;
16
16
elements:: Int , elements_per_thread:: Int ) where {F,N}
@@ -24,39 +24,3 @@ struct CuKernelContext <: AbstractKernelContext end
24
24
launch_configuration (kernel. fun; max_threads= 256 )
25
25
end
26
26
end
27
-
28
- @inline function GPUArrays. gpu_call (:: CuArrayBackend , f:: F , args:: TT , threads:: Int ,
29
- blocks:: Int ; name:: Union{String,Nothing} ) where {F,TT}
30
- @cuda threads blocks name f (CuKernelContext (), args... )
31
- end
32
-
33
-
34
- # # on-device
35
-
36
- # indexing
37
-
38
- GPUArrays. blockidx (ctx:: CuKernelContext ) = blockIdx (). x
39
- GPUArrays. blockdim (ctx:: CuKernelContext ) = blockDim (). x
40
- GPUArrays. threadidx (ctx:: CuKernelContext ) = threadIdx (). x
41
- GPUArrays. griddim (ctx:: CuKernelContext ) = gridDim (). x
42
-
43
- # memory
44
-
45
- @inline function GPUArrays. LocalMemory (:: CuKernelContext , :: Type{T} , :: Val{dims} , :: Val{id}
46
- ) where {T, dims, id}
47
- ptr = CUDA. _shmem (Val (id), T, Val (prod (dims)))
48
- ptr = reinterpret (LLVMPtr{T, AS. Shared}, ptr)
49
- CuDeviceArray {T,length(dims),AS.Shared} (ptr, dims)
50
- end
51
-
52
- # synchronization
53
-
54
- @inline GPUArrays. synchronize_threads (:: CuKernelContext ) = sync_threads ()
55
-
56
-
57
-
58
- #
59
- # Host abstractions
60
- #
61
-
62
- GPUArrays. backend (:: Type{<:CuArray} ) = CuArrayBackend ()
0 commit comments