Skip to content

Commit c2417a1

Browse files
committed
Add AMD section to naive_transpose.jl example
1 parent d61f6a9 commit c2417a1

File tree

1 file changed

+39
-5
lines changed

1 file changed

+39
-5
lines changed

examples/naive_transpose.jl

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
using KernelAbstractions, CUDAKernels, Test, CUDA
1+
using KernelAbstractions
2+
using CUDA
3+
using CUDAKernels
4+
using AMDGPU
5+
using ROCKernels
6+
using Test
27

38
if has_cuda_gpu()
49
CUDA.allowscalar(false)
510
end
611

712
@kernel function naive_transpose_kernel!(a, b)
8-
i, j = @index(Global, NTuple)
9-
@inbounds b[i, j] = a[j, i]
13+
i, j = @index(Global, NTuple)
14+
@inbounds b[i, j] = a[j, i]
1015
end
1116

1217
# create wrapper function to check inputs
@@ -16,11 +21,17 @@ function naive_transpose!(a, b)
1621
println("Matrix size mismatch!")
1722
return nothing
1823
end
24+
1925
if isa(a, Array)
20-
kernel! = naive_transpose_kernel!(CPU(),4)
26+
kernel! = naive_transpose_kernel!(CPU(), 4)
27+
elseif isa(a, CuArray)
28+
kernel! = naive_transpose_kernel!(CUDADevice(), 256)
29+
elseif isa(a, ROCArray)
30+
kernel! = naive_transpose_kernel!(ROCDevice(), 256)
2131
else
22-
kernel! = naive_transpose_kernel!(CUDADevice(),256)
32+
println("Unrecognized array type!")
2333
end
34+
2435
kernel!(a, b, ndrange=size(a))
2536
end
2637

@@ -49,3 +60,26 @@ if has_cuda_gpu()
4960

5061
@test a == transpose(b)
5162
end
63+
64+
function has_rocm_gpu()
65+
for agent in AMDGPU.get_agents()
66+
if agent.type == :gpu
67+
return true
68+
end
69+
end
70+
return false
71+
end
72+
73+
if has_rocm_gpu()
74+
d_a = ROCArray(a)
75+
d_b = zeros(Float32, res, res) |> ROCArray
76+
77+
ev = naive_transpose!(d_a, d_b)
78+
wait(ev)
79+
80+
a = Array(d_a)
81+
b = Array(d_b)
82+
83+
@test a == transpose(b)
84+
end
85+

0 commit comments

Comments
 (0)