1
- using KernelAbstractions, CUDAKernels, Test, CUDA
1
+ using KernelAbstractions
2
+ using CUDA
3
+ using CUDAKernels
4
+ using AMDGPU
5
+ using ROCKernels
6
+ using Test
2
7
3
8
if has_cuda_gpu ()
4
9
CUDA. allowscalar (false )
5
10
end
6
11
7
12
@kernel function naive_transpose_kernel! (a, b)
8
- i, j = @index (Global, NTuple)
9
- @inbounds b[i, j] = a[j, i]
13
+ i, j = @index (Global, NTuple)
14
+ @inbounds b[i, j] = a[j, i]
10
15
end
11
16
12
17
# create wrapper function to check inputs
@@ -16,11 +21,17 @@ function naive_transpose!(a, b)
16
21
println (" Matrix size mismatch!" )
17
22
return nothing
18
23
end
24
+
19
25
if isa (a, Array)
20
- kernel! = naive_transpose_kernel! (CPU (),4 )
26
+ kernel! = naive_transpose_kernel! (CPU (), 4 )
27
+ elseif isa (a, CuArray)
28
+ kernel! = naive_transpose_kernel! (CUDADevice (), 256 )
29
+ elseif isa (a, ROCArray)
30
+ kernel! = naive_transpose_kernel! (ROCDevice (), 256 )
21
31
else
22
- kernel! = naive_transpose_kernel! ( CUDADevice (), 256 )
32
+ println ( " Unrecognized array type! " )
23
33
end
34
+
24
35
kernel! (a, b, ndrange= size (a))
25
36
end
26
37
@@ -49,3 +60,26 @@ if has_cuda_gpu()
49
60
50
61
@test a == transpose (b)
51
62
end
63
+
64
+ function has_rocm_gpu ()
65
+ for agent in AMDGPU. get_agents ()
66
+ if agent. type == :gpu
67
+ return true
68
+ end
69
+ end
70
+ return false
71
+ end
72
+
73
+ if has_rocm_gpu ()
74
+ d_a = ROCArray (a)
75
+ d_b = zeros (Float32, res, res) |> ROCArray
76
+
77
+ ev = naive_transpose! (d_a, d_b)
78
+ wait (ev)
79
+
80
+ a = Array (d_a)
81
+ b = Array (d_b)
82
+
83
+ @test a == transpose (b)
84
+ end
85
+
0 commit comments