Skip to content

Commit 87e4daf

Browse files
committed
adding necessary changes for KA transition with gpuarrays
1 parent e872db7 commit 87e4daf

File tree

1 file changed

+1
-45
lines changed

1 file changed

+1
-45
lines changed

src/gpuarrays.jl

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,4 @@
1-
struct ROCArrayBackend <: AbstractGPUBackend end
2-
3-
struct ROCKernelContext <: AbstractKernelContext end
4-
5-
@inline function GPUArrays.gpu_call(
6-
::ROCArrayBackend, f, args, threads::Int, blocks::Int; name::Maybe{String},
7-
)
8-
@roc gridsize=blocks groupsize=threads name=name f(ROCKernelContext(), args...)
9-
end
10-
11-
# indexing
12-
13-
for (f, froc) in (
14-
(:blockidx, :blockIdx),
15-
(:blockdim, :blockDim),
16-
(:threadidx, :threadIdx),
17-
(:griddim, :gridGroupDim)
18-
)
19-
@eval @inline GPUArrays.$f(::ROCKernelContext) = AMDGPU.$froc().x
20-
end
21-
22-
# math
23-
24-
@inline GPUArrays.cos(::ROCKernelContext, x) = cos(x)
25-
@inline GPUArrays.sin(::ROCKernelContext, x) = sin(x)
26-
@inline GPUArrays.sqrt(::ROCKernelContext, x) = sqrt(x)
27-
@inline GPUArrays.log(::ROCKernelContext, x) = log(x)
28-
29-
# memory
30-
31-
@inline function GPUArrays.LocalMemory(::ROCKernelContext, ::Type{T}, ::Val{dims}, ::Val{id}) where {T,dims,id}
32-
ptr = AMDGPU.Device.alloc_special(Val{id}(), T, Val{AMDGPU.AS.Local}(), Val{prod(dims)}())
33-
ROCDeviceArray(dims, ptr)
34-
end
35-
36-
# synchronization
37-
38-
@inline function GPUArrays.synchronize_threads(::ROCKernelContext)
39-
sync_workgroup()
40-
return
41-
end
42-
43-
GPUArrays.device(x::ROCArray) = x.buf[].device
44-
45-
GPUArrays.backend(::Type{<:ROCArray}) = ROCArrayBackend()
1+
import KernelAbstractions
462

473
function GPUArrays.derive(
484
::Type{T}, x::ROCArray, dims::Dims{N}, offset::Int,

0 commit comments

Comments
 (0)