Skip to content

Commit 1497d41

Browse files
authored
Adding a few more examples (#12)
adding simple mat mult and return kernels
1 parent 7b2dc94 commit 1497d41

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

examples/matmul.jl

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using KernelAbstractions, Test, CUDAapi
2+
if CUDAapi.has_cuda_gpu()
3+
using CuArrays
4+
CuArrays.allowscalar(false)
5+
end
6+
7+
# Simple kernel for matrix multiplication
8+
@kernel function matmul!(a, b, c)
9+
if size(a)[2] != size(b)[1]
10+
# here, we need a CPU / GPU generic print statement, like...
11+
# CUDAnative.@cuprintf("Matrix size mismatch!")
12+
return nothing
13+
end
14+
cI = @index(Global, Cartesian)
15+
16+
# creating a temporary sum variable for matrix multiplication
17+
tmp_sum = 0
18+
19+
for i = 1:size(a)[2]
20+
tmp_sum += a[cI[1],i] * b[i,cI[2]]
21+
end
22+
23+
c[cI] = tmp_sum
24+
end
25+
26+
# Creating a wrapper kernel for launching with error checks
27+
function launch_matmul!(a, b, c)
28+
if size(a)[2] != size(b)[1]
29+
println("Matrix size mismatch!")
30+
return nothing
31+
end
32+
if isa(a, Array)
33+
kernel! = matmul!(CPU(),4)
34+
else
35+
kernel! = matmul!(CUDA(),256)
36+
end
37+
kernel!(a, b, c, ndrange=size(c))
38+
end
39+
40+
function check()
41+
a = rand(256,123)
42+
b = rand(123, 45)
43+
c = zeros(256, 45)
44+
45+
# beginning CPU tests, returns event
46+
ev = launch_matmul!(a,b,c)
47+
wait(ev)
48+
49+
println("Testing CPU matrix multiplication...")
50+
@test isapprox(a*b, c)
51+
52+
# beginning GPU tests
53+
if has_cuda_gpu()
54+
d_a = CuArray(a)
55+
d_b = CuArray(b)
56+
d_c = CuArray(c)
57+
58+
ev = launch_matmul!(d_a, d_b, d_c)
59+
wait(ev)
60+
c = a*b
61+
62+
println("Testing GPU matrix multiplication...")
63+
@test isapprox(Array(d_c), c)
64+
end
65+
end
66+
67+
check()

examples/naive_transpose.jl

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
using KernelAbstractions, Test, CUDAapi
2+
if CUDAapi.has_cuda_gpu()
3+
using CuArrays
4+
CuArrays.allowscalar(false)
5+
end
6+
7+
@kernel function copy!(a,b)
8+
I = @index(Global)
9+
@inbounds b[I] = a[I]
10+
end
11+
12+
@kernel function naive_transpose!(a, b)
13+
I = @index(Global, Cartesian)
14+
i, j = Tuple(I)
15+
@inbounds b[i, j] = a[j, i]
16+
end
17+
18+
# creating wrapper functions
19+
function launch_copy!(a, b)
20+
if size(a) != size(b)
21+
println("Matrix size mismatch!")
22+
return nothing
23+
end
24+
if isa(a, Array)
25+
kernel! = copy!(CPU(),4)
26+
else
27+
kernel! = copy!(CUDA(),1024)
28+
end
29+
kernel!(a, b, ndrange=size(a))
30+
end
31+
32+
# creating wrapper functions
33+
function launch_naive_transpose!(a, b)
34+
if size(a)[1] != size(b)[2] || size(a)[2] != size(b)[1]
35+
println("Matrix size mismatch!")
36+
return nothing
37+
end
38+
if isa(a, Array)
39+
kernel! = naive_transpose!(CPU(),4)
40+
else
41+
kernel! = naive_transpose!(CUDA(),256)
42+
end
43+
kernel!(a, b, ndrange=size(a))
44+
end
45+
46+
function main()
47+
48+
# resolution of grid will be res*res
49+
res = 1024
50+
51+
# creating initial arrays on CPU and GPU
52+
a = round.(rand(Float32, (res, res))*100)
53+
b = zeros(Float32, res, res)
54+
55+
# beginning CPU tests
56+
ev = launch_copy!(a, b)
57+
wait(ev)
58+
59+
ev = launch_naive_transpose!(a,b)
60+
wait(ev)
61+
62+
println("CPU transpose time is:")
63+
println("Testing CPU transpose...")
64+
@test a == transpose(b)
65+
66+
# beginning GPU tests
67+
if has_cuda_gpu()
68+
d_a = CuArray(a)
69+
d_b = CuArray(zeros(Float32, res, res))
70+
71+
ev = launch_copy!(d_a, d_b)
72+
wait(ev)
73+
74+
ev = launch_naive_transpose!(d_a, d_b)
75+
wait(ev)
76+
77+
a = Array(d_a)
78+
b = Array(d_b)
79+
80+
println("Testing GPU transpose...")
81+
@test a == transpose(b)
82+
end
83+
84+
return nothing
85+
end
86+
87+
main()
88+

0 commit comments

Comments
 (0)