Skip to content

Detect device-side exceptions on the host. #342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/cl/cmdqueue.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ end

function finish(q::CmdQueue)
clFinish(q)
OpenCL.check_exceptions()
return q
end

Expand Down
1 change: 1 addition & 0 deletions src/OpenCL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ include("array.jl")

# compiler implementation
include("compiler/compilation.jl")
include("compiler/exceptions.jl")
include("compiler/execution.jl")
include("compiler/reflection.jl")

Expand Down
2 changes: 2 additions & 0 deletions src/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ GPUCompiler.runtime_module(::CompilerJob{<:Any,OpenCLCompilerParams}) = OpenCL
GPUCompiler.method_table_view(job::OpenCLCompilerJob) =
GPUCompiler.StackedMethodTable(job.world, method_table, SPIRVIntrinsics.method_table)

GPUCompiler.kernel_state_type(job::OpenCLCompilerJob) = KernelState

# filter out OpenCL built-ins
# TODO: eagerly lower these using the translator API
GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
Expand Down
52 changes: 52 additions & 0 deletions src/compiler/exceptions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# support for device-side exceptions

## exception type

struct KernelException <: Exception
devs::Vector{cl.Device}
end

function Base.showerror(io::IO, err::KernelException)
print(io, "KernelException: exception thrown during kernel execution on device(s) $(join(map(dev->dev.name, err.devs), ", "))")
end


## exception handling

const exception_infos = Dict{cl.Context, Union{Nothing, cl.AbstractPointerMemory}}()

# create a CPU/GPU exception flag for error signalling
function create_exceptions!(ctx::cl.Context, dev::cl.Device)
mem = get!(exception_infos, ctx) do
if cl.svm_capabilities(cl.device()).fine_grain_buffer
cl.svm_alloc(sizeof(ExceptionInfo_st); fine_grained=true)
elseif cl.usm_supported(dev) && cl.usm_capabilities(dev).host.access
cl.host_alloc(sizeof(ExceptionInfo_st))
else
nothing
end
end
if mem === nothing
return convert(ExceptionInfo, C_NULL)
end

exception_info = convert(ExceptionInfo, mem)
unsafe_store!(exception_info, ExceptionInfo_st())
return exception_info
end

# check the exception flags on every API call
function check_exceptions()
for (ctx, mem) in exception_infos
mem === nothing && continue
exception_info = convert(ExceptionInfo, mem)
if exception_info.status != 0
# restore the structure
unsafe_store!(exception_info, ExceptionInfo_st())

# throw host-side
throw(KernelException(ctx.devices))
end
end
return
end
19 changes: 18 additions & 1 deletion src/compiler/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ abstract type AbstractKernel{F, TT} end
call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]]
call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]]

# add the kernel state as the first argument
pushfirst!(call_t, KernelState)
pushfirst!(call_args, :(kernel.state))

# replace non-isbits arguments (they should be unused, or compilation would have failed)
for (i,dt) in enumerate(call_t)
if !isbitstype(dt)
Expand All @@ -156,6 +160,16 @@ abstract type AbstractKernel{F, TT} end

quote
indirect_memory = cl.AbstractMemory[]

# add exception info buffer to indirect memory
# XXX: this is too expensive
if kernel.state.exception_info != C_NULL
ctx = cl.context()
if haskey(exception_infos, ctx)
push!(indirect_memory, exception_infos[ctx])
end
end

clcall(kernel.fun, $call_tt, $(call_args...); indirect_memory, call_kwargs...)
end
end
Expand All @@ -167,6 +181,7 @@ end
struct HostKernel{F,TT} <: AbstractKernel{F,TT}
f::F
fun::cl.Kernel
state::KernelState
end


Expand All @@ -191,7 +206,9 @@ function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
kernel = get(_kernel_instances, h, nothing)
if kernel === nothing
# create the kernel state object
kernel = HostKernel{F,tt}(f, fun)
exception_info = create_exceptions!(ctx, dev)
state = KernelState(exception_info)
kernel = HostKernel{F,tt}(f, fun, state)
_kernel_instances[h] = kernel
end
return kernel::HostKernel{F,tt}
Expand Down
51 changes: 50 additions & 1 deletion src/device/runtime.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,56 @@
# reset the runtime cache from global scope, so that any change triggers recompilation
GPUCompiler.reset_runtime()

signal_exception() = return
## exception handling

struct ExceptionInfo_st
# whether an exception has been encountered (0 -> 1)
status::Int32

ExceptionInfo_st() = new(0)
end

# to simplify use of this struct, which is passed by-reference, use property overloading
const ExceptionInfo = Ptr{ExceptionInfo_st}
@inline function Base.getproperty(info::ExceptionInfo, sym::Symbol)
if sym === :status
unsafe_load(convert(Ptr{Int32}, info))
else
getfield(info, sym)
end
end
@inline function Base.setproperty!(info::ExceptionInfo, sym::Symbol, value)
if sym === :status
unsafe_store!(convert(Ptr{Int32}, info), value)
else
setfield!(info, sym, value)
end
end

## kernel state

struct KernelState
exception_info::ExceptionInfo

# XXX: Intel's SPIR-V compiler does not support array-valued kernel arguments, and Julia
# emits homogeneous structs as arrays. Work around this by including a dummy field.
dummy::UInt32
end
KernelState(exception_info::ExceptionInfo) = KernelState(exception_info, 42)

@inline @generated kernel_state() = GPUCompiler.kernel_state_value(KernelState)

function signal_exception()
info = kernel_state().exception_info

# inform the host
if info != C_NULL
info.status = 1
write_mem_fence(OpenCL.GLOBAL_MEM_FENCE)
end

return
end

malloc(sz) = C_NULL

Expand Down
Loading