Skip to content

Commit 7644f62

Browse files
committed
switch name to CUDADevice from CUDAGPU
1 parent 349865a commit 7644f62

File tree

15 files changed

+47
-47
lines changed

15 files changed

+47
-47
lines changed

docs/src/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,13 @@ all(A .== 2.0)
5555
1. Functions inside kernels are forcefully inlined, except when marked with `@noinline`.
5656
2. Floating-point multiplication, addition, subtraction are marked contractable.
5757

58-
## Important differences to CUDA
58+
## Important differences to CUDA.jl
5959

6060
1. The kernels are automatically bounds-checked against either the dynamic or statically
6161
provided `ndrange`.
6262
2. Functions like `Base.sin` are mapped to `CUDA.sin`.
6363

64-
## Important differences to GPUifyLoops
64+
## Important differences to GPUifyLoops.jl
6565

6666
1. `@scratch` has been renamed to `@private`, and the semantics have changed. Instead
6767
of denoting how many dimensions are implicit on the GPU, you only ever provide the

examples/matmul.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ function matmul!(a, b, c)
2626
if isa(a, Array)
2727
kernel! = matmul_kernel!(CPU(),4)
2828
else
29-
kernel! = matmul_kernel!(CUDAGPU(),256)
29+
kernel! = matmul_kernel!(CUDADevice(),256)
3030
end
3131
kernel!(a, b, c, ndrange=size(c))
3232
end

examples/memcopy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ if has_cuda_gpu()
2424

2525
function mycopy!(A::CuArray, B::CuArray)
2626
@assert size(A) == size(B)
27-
copy_kernel!(CUDAGPU(), 256)(A, B, ndrange=length(A))
27+
copy_kernel!(CUDADevice(), 256)(A, B, ndrange=length(A))
2828
end
2929

3030
A = CuArray{Float32}(undef, 1024)

examples/memcopy_static.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ if has_cuda_gpu()
2323

2424
function mycopy_static!(A::CuArray, B::CuArray)
2525
@assert size(A) == size(B)
26-
kernel = copy_kernel!(CUDAGPU(), 32, size(A)) # if size(A) varies this will cause recompilation
26+
kernel = copy_kernel!(CUDADevice(), 32, size(A)) # if size(A) varies this will cause recompilation
2727
kernel(A, B, ndrange=size(A))
2828
end
2929

examples/mpi.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ end
1010

1111
using MPI
1212

13-
device(A) = typeof(A) <: Array ? CPU() : CUDAGPU()
13+
device(A) = typeof(A) <: Array ? CPU() : CUDADevice()
1414

1515
function mpiyield()
1616
MPI.Iprobe(MPI.MPI_ANY_SOURCE, MPI.MPI_ANY_TAG, MPI.COMM_WORLD)

examples/naive_transpose.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ function naive_transpose!(a, b)
1919
if isa(a, Array)
2020
kernel! = naive_transpose_kernel!(CPU(),4)
2121
else
22-
kernel! = naive_transpose_kernel!(CUDAGPU(),256)
22+
kernel! = naive_transpose_kernel!(CUDADevice(),256)
2323
end
2424
kernel!(a, b, ndrange=size(a))
2525
end

examples/performance.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ const nreps = 1
1919
NVTX.@range "Naive transpose ($block_dim, $block_dim)" let
2020
a = CuArray(rand(T, shape))
2121
b = similar(a, shape[2], shape[1])
22-
kernel! = transpose_kernel_naive!(CUDAGPU(), (block_dim, block_dim), size(b))
22+
kernel! = transpose_kernel_naive!(CUDADevice(), (block_dim, block_dim), size(b))
2323

2424
event = kernel!(b, a)
2525
wait(event)
@@ -35,7 +35,7 @@ end
3535
NVTX.@range "Naive transpose ($(block_dim^2), 1)" let
3636
a = CuArray(rand(T, shape))
3737
b = similar(a, shape[2], shape[1])
38-
kernel! = transpose_kernel_naive!(CUDAGPU(), (block_dim*block_dim, 1), size(b))
38+
kernel! = transpose_kernel_naive!(CUDADevice(), (block_dim*block_dim, 1), size(b))
3939

4040
event = kernel!(b, a)
4141
wait(event)
@@ -51,7 +51,7 @@ end
5151
NVTX.@range "Naive transpose (1, $(block_dim^2))" let
5252
a = CuArray(rand(T, shape))
5353
b = similar(a, shape[2], shape[1])
54-
kernel! = transpose_kernel_naive!(CUDAGPU(), (1, block_dim*block_dim), size(b))
54+
kernel! = transpose_kernel_naive!(CUDADevice(), (1, block_dim*block_dim), size(b))
5555

5656
event = kernel!(b, a)
5757
wait(event)

src/KernelAbstractions.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module KernelAbstractions
22

33
export @kernel
44
export @Const, @localmem, @private, @uniform, @synchronize, @index, groupsize, @print
5-
export Device, GPU, CPU, CUDAGPU, Event, MultiEvent, NoneEvent
5+
export Device, GPU, CPU, CUDADevice, Event, MultiEvent, NoneEvent
66
export async_copy!
77

88

@@ -303,7 +303,7 @@ abstract type Device end
303303
abstract type GPU <: Device end
304304

305305
struct CPU <: Device end
306-
struct CUDAGPU <: GPU end
306+
struct CUDADevice <: GPU end
307307
# struct AMD <: GPU end
308308
# struct Intel <: GPU end
309309

src/backends/cuda.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ end
4949
failed(::CudaEvent) = false
5050
isdone(ev::CudaEvent) = CUDA.query(ev.event)
5151

52-
function Event(::CUDAGPU)
52+
function Event(::CUDADevice)
5353
stream = CUDA.CuDefaultStream()
5454
event = CUDA.CuEvent(CUDA.EVENT_DISABLE_TIMING)
5555
CUDA.record(event, stream)
@@ -69,25 +69,25 @@ function wait(::CPU, ev::CudaEvent, progress=yield)
6969
end
7070

7171
# Use this to synchronize between computation using the CuDefaultStream
72-
wait(::CUDAGPU, ev::CudaEvent, progress=nothing, stream=CUDA.CuDefaultStream()) = CUDA.wait(ev.event, stream)
73-
wait(::CUDAGPU, ev::NoneEvent, progress=nothing, stream=nothing) = nothing
72+
wait(::CUDADevice, ev::CudaEvent, progress=nothing, stream=CUDA.CuDefaultStream()) = CUDA.wait(ev.event, stream)
73+
wait(::CUDADevice, ev::NoneEvent, progress=nothing, stream=nothing) = nothing
7474

75-
function wait(::CUDAGPU, ev::MultiEvent, progress=nothing, stream=CUDA.CuDefaultStream())
75+
function wait(::CUDADevice, ev::MultiEvent, progress=nothing, stream=CUDA.CuDefaultStream())
7676
dependencies = collect(ev.events)
7777
cudadeps = filter(d->d isa CudaEvent, dependencies)
7878
otherdeps = filter(d->!(d isa CudaEvent), dependencies)
7979
for event in cudadeps
8080
CUDA.wait(event.event, stream)
8181
end
8282
for event in otherdeps
83-
wait(CUDAGPU(), event, progress, stream)
83+
wait(CUDADevice(), event, progress, stream)
8484
end
8585
end
8686

8787
include("cusynchronization.jl")
8888
import .CuSynchronization: unsafe_volatile_load, unsafe_volatile_store!
8989

90-
function wait(::CUDAGPU, ev::CPUEvent, progress=nothing, stream=nothing)
90+
function wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=nothing)
9191
error("""
9292
Waiting on the GPU for an CPU event to finish is currently not supported.
9393
We have encountered deadlocks arising, due to interactions with the CUDA
@@ -106,7 +106,7 @@ end
106106
# TODO:
107107
# - In case of an error we should probably also kill the waiting GPU code.
108108
unsafe_wait(dev::Device, ev, progress=nothing) = wait(dev, ev, progress)
109-
function unsafe_wait(::CUDAGPU, ev::CPUEvent, progress=nothing, stream=CUDA.CuDefaultStream())
109+
function unsafe_wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=CUDA.CuDefaultStream())
110110
buf = CUDA.Mem.alloc(CUDA.Mem.HostBuffer, sizeof(UInt32), CUDA.Mem.HOSTREGISTER_DEVICEMAP)
111111
unsafe_store!(convert(Ptr{UInt32}, buf), UInt32(0))
112112
# TODO: Switch to `@spawn` when CUDA.jl is thread-safe
@@ -150,12 +150,12 @@ function __pin!(a)
150150
return nothing
151151
end
152152

153-
function async_copy!(::CUDAGPU, A, B; dependencies=nothing, progress=yield)
153+
function async_copy!(::CUDADevice, A, B; dependencies=nothing, progress=yield)
154154
A isa Array && __pin!(A)
155155
B isa Array && __pin!(B)
156156

157157
stream = next_stream()
158-
wait(CUDAGPU(), MultiEvent(dependencies), progress, stream)
158+
wait(CUDADevice(), MultiEvent(dependencies), progress, stream)
159159
event = CUDA.CuEvent(CUDA.EVENT_DISABLE_TIMING)
160160
GC.@preserve A B begin
161161
destptr = pointer(A)
@@ -173,7 +173,7 @@ end
173173
###
174174
# Kernel launch
175175
###
176-
function (obj::Kernel{CUDAGPU})(args...; ndrange=nothing, dependencies=nothing, workgroupsize=nothing, progress=yield)
176+
function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=nothing, workgroupsize=nothing, progress=yield)
177177
if ndrange isa Integer
178178
ndrange = (ndrange,)
179179
end
@@ -203,7 +203,7 @@ function (obj::Kernel{CUDAGPU})(args...; ndrange=nothing, dependencies=nothing,
203203
end
204204

205205
stream = next_stream()
206-
wait(CUDAGPU(), MultiEvent(dependencies), progress, stream)
206+
wait(CUDADevice(), MultiEvent(dependencies), progress, stream)
207207

208208
ctx = mkcontext(obj, ndrange, iterspace)
209209
# Launch kernel
@@ -218,7 +218,7 @@ end
218218

219219
Cassette.@context CUDACtx
220220

221-
function mkcontext(kernel::Kernel{CUDAGPU}, _ndrange, iterspace)
221+
function mkcontext(kernel::Kernel{CUDADevice}, _ndrange, iterspace)
222222
metadata = CompilerMetadata{ndrange(kernel), DynamicCheck}(_ndrange, iterspace)
223223
Cassette.disablehooks(CUDACtx(pass = CompilerPass, metadata=metadata))
224224
end

test/async_copy.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ end
2020
M = 1024
2121

2222
if has_cuda_gpu()
23-
copy_test(CUDAGPU(), CuArray, M)
23+
copy_test(CUDADevice(), CuArray, M)
2424
end
25-
copy_test(CPU(), Array, M)
25+
copy_test(CPU(), Array, M)

0 commit comments

Comments
 (0)