Skip to content

Commit 5f12146

Browse files
authored
Merge pull request #266 from ali-ramadhan/patch-3
Add AMD section to a couple of examples
2 parents 045fab2 + 58ddcd7 commit 5f12146

File tree

2 files changed

+49
-6
lines changed

2 files changed

+49
-6
lines changed

examples/memcopy.jl

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using KernelAbstractions
2-
using CUDAKernels
32
using CUDA
3+
using CUDAKernels
4+
using AMDGPU
5+
using ROCKernels
46
using Test
57

68
@kernel function copy_kernel!(A, @Const(B))
@@ -34,3 +36,18 @@ if has_cuda_gpu()
3436
wait(event)
3537
@test A == B
3638
end
39+
40+
41+
if has_rocm_gpu()
42+
43+
function mycopy!(A::ROCArray, B::ROCArray)
44+
@assert size(A) == size(B)
45+
copy_kernel!(ROCDevice(), 256)(A, B, ndrange=length(A))
46+
end
47+
48+
A = zeros(Float32, 1024) |> ROCArray
49+
B = ones(Float32, 1024) |> ROCArray
50+
event = mycopy!(A, B)
51+
wait(event)
52+
@test A == B
53+
end

examples/naive_transpose.jl

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
using KernelAbstractions, CUDAKernels, Test, CUDA
1+
using KernelAbstractions
2+
using CUDA
3+
using CUDAKernels
4+
using AMDGPU
5+
using ROCKernels
6+
using Test
27

38
if has_cuda_gpu()
49
CUDA.allowscalar(false)
510
end
611

712
@kernel function naive_transpose_kernel!(a, b)
8-
i, j = @index(Global, NTuple)
9-
@inbounds b[i, j] = a[j, i]
13+
i, j = @index(Global, NTuple)
14+
@inbounds b[i, j] = a[j, i]
1015
end
1116

1217
# create wrapper function to check inputs
@@ -16,11 +21,17 @@ function naive_transpose!(a, b)
1621
println("Matrix size mismatch!")
1722
return nothing
1823
end
24+
1925
if isa(a, Array)
20-
kernel! = naive_transpose_kernel!(CPU(),4)
26+
kernel! = naive_transpose_kernel!(CPU(), 4)
27+
elseif isa(a, CuArray)
28+
kernel! = naive_transpose_kernel!(CUDADevice(), 256)
29+
elseif isa(a, ROCArray)
30+
kernel! = naive_transpose_kernel!(ROCDevice(), 256)
2131
else
22-
kernel! = naive_transpose_kernel!(CUDADevice(),256)
32+
println("Unrecognized array type!")
2333
end
34+
2435
kernel!(a, b, ndrange=size(a))
2536
end
2637

@@ -49,3 +60,18 @@ if has_cuda_gpu()
4960

5061
@test a == transpose(b)
5162
end
63+
64+
65+
if has_rocm_gpu()
66+
d_a = ROCArray(a)
67+
d_b = zeros(Float32, res, res) |> ROCArray
68+
69+
ev = naive_transpose!(d_a, d_b)
70+
wait(ev)
71+
72+
a = Array(d_a)
73+
b = Array(d_b)
74+
75+
@test a == transpose(b)
76+
end
77+

0 commit comments

Comments
 (0)