|
1 |
| -struct ROCArrayBackend <: AbstractGPUBackend end |
| 1 | +import KernelAbstractions |
| 2 | +import KernelAbstractions: Backend |
2 | 3 |
|
3 |
| -struct ROCKernelContext <: AbstractKernelContext end |
4 |
| - |
5 |
| -@inline function GPUArrays.gpu_call( |
6 |
| - ::ROCArrayBackend, f, args, threads::Int, blocks::Int; name::Maybe{String}, |
7 |
| -) |
8 |
| - @roc gridsize=blocks groupsize=threads name=name f(ROCKernelContext(), args...) |
9 |
| -end |
10 |
| - |
11 |
| -# indexing |
12 |
| - |
13 |
| -for (f, froc) in ( |
14 |
| - (:blockidx, :blockIdx), |
15 |
| - (:blockdim, :blockDim), |
16 |
| - (:threadidx, :threadIdx), |
17 |
| - (:griddim, :gridGroupDim) |
18 |
| -) |
19 |
| - @eval @inline GPUArrays.$f(::ROCKernelContext) = AMDGPU.$froc().x |
20 |
| -end |
21 |
| - |
22 |
| -# math |
23 |
| - |
24 |
| -@inline GPUArrays.cos(::ROCKernelContext, x) = cos(x) |
25 |
| -@inline GPUArrays.sin(::ROCKernelContext, x) = sin(x) |
26 |
| -@inline GPUArrays.sqrt(::ROCKernelContext, x) = sqrt(x) |
27 |
| -@inline GPUArrays.log(::ROCKernelContext, x) = log(x) |
28 |
| - |
29 |
| -# memory |
30 |
| - |
31 |
| -@inline function GPUArrays.LocalMemory(::ROCKernelContext, ::Type{T}, ::Val{dims}, ::Val{id}) where {T,dims,id} |
32 |
| - ptr = AMDGPU.Device.alloc_special(Val{id}(), T, Val{AMDGPU.AS.Local}(), Val{prod(dims)}()) |
33 |
| - ROCDeviceArray(dims, ptr) |
34 |
| -end |
35 |
| - |
36 |
| -# synchronization |
37 |
| - |
38 |
| -@inline function GPUArrays.synchronize_threads(::ROCKernelContext) |
39 |
| - sync_workgroup() |
40 |
| - return |
41 |
| -end |
42 |
| - |
43 |
| -GPUArrays.device(x::ROCArray) = x.buf[].device |
44 |
| - |
45 |
| -GPUArrays.backend(::Type{<:ROCArray}) = ROCArrayBackend() |
| 4 | +struct ROCArrayBackend <: Backend end |
46 | 5 |
|
47 | 6 | function GPUArrays.derive(
|
48 | 7 | ::Type{T}, x::ROCArray, dims::Dims{N}, offset::Int,
|
|
0 commit comments