diff --git a/lib/cl/cmdqueue.jl b/lib/cl/cmdqueue.jl index f0d0c14f..31d49bb1 100644 --- a/lib/cl/cmdqueue.jl +++ b/lib/cl/cmdqueue.jl @@ -62,6 +62,7 @@ end function finish(q::CmdQueue) clFinish(q) + OpenCL.check_exceptions() return q end diff --git a/src/OpenCL.jl b/src/OpenCL.jl index 4fe5c863..ca838912 100644 --- a/src/OpenCL.jl +++ b/src/OpenCL.jl @@ -31,6 +31,7 @@ include("array.jl") # compiler implementation include("compiler/compilation.jl") +include("compiler/exceptions.jl") include("compiler/execution.jl") include("compiler/reflection.jl") diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl index 1e6fc506..ff091b25 100644 --- a/src/compiler/compilation.jl +++ b/src/compiler/compilation.jl @@ -9,6 +9,8 @@ GPUCompiler.runtime_module(::CompilerJob{<:Any,OpenCLCompilerParams}) = OpenCL GPUCompiler.method_table_view(job::OpenCLCompilerJob) = GPUCompiler.StackedMethodTable(job.world, method_table, SPIRVIntrinsics.method_table) +GPUCompiler.kernel_state_type(job::OpenCLCompilerJob) = KernelState + # filter out OpenCL built-ins # TODO: eagerly lower these using the translator API GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) = diff --git a/src/compiler/exceptions.jl b/src/compiler/exceptions.jl new file mode 100644 index 00000000..c1311974 --- /dev/null +++ b/src/compiler/exceptions.jl @@ -0,0 +1,52 @@ +# support for device-side exceptions + +## exception type + +struct KernelException <: Exception + devs::Vector{cl.Device} +end + +function Base.showerror(io::IO, err::KernelException) + print(io, "KernelException: exception thrown during kernel execution on device(s) $(join(map(dev->dev.name, err.devs), ", "))") +end + + +## exception handling + +const exception_infos = Dict{cl.Context, Union{Nothing, cl.AbstractPointerMemory}}() + +# create a CPU/GPU exception flag for error signalling +function create_exceptions!(ctx::cl.Context, dev::cl.Device) + mem = get!(exception_infos, ctx) do + if cl.svm_capabilities(cl.device()).fine_grain_buffer + cl.svm_alloc(sizeof(ExceptionInfo_st); fine_grained=true) + elseif cl.usm_supported(dev) && cl.usm_capabilities(dev).host.access + cl.host_alloc(sizeof(ExceptionInfo_st)) + else + nothing + end + end + if mem === nothing + return convert(ExceptionInfo, C_NULL) + end + + exception_info = convert(ExceptionInfo, mem) + unsafe_store!(exception_info, ExceptionInfo_st()) + return exception_info +end + +# check the exception flags on every API call +function check_exceptions() + for (ctx, mem) in exception_infos + mem === nothing && continue + exception_info = convert(ExceptionInfo, mem) + if exception_info.status != 0 + # restore the structure + unsafe_store!(exception_info, ExceptionInfo_st()) + + # throw host-side + throw(KernelException(ctx.devices)) + end + end + return +end diff --git a/src/compiler/execution.jl b/src/compiler/execution.jl index 881ea906..519466af 100644 --- a/src/compiler/execution.jl +++ b/src/compiler/execution.jl @@ -143,6 +143,10 @@ abstract type AbstractKernel{F, TT} end call_t = Type[x[1] for x in zip(sig.parameters, to_pass) if x[2]] call_args = Union{Expr,Symbol}[x[1] for x in zip(args, to_pass) if x[2]] + # add the kernel state as the first argument + pushfirst!(call_t, KernelState) + pushfirst!(call_args, :(kernel.state)) + # replace non-isbits arguments (they should be unused, or compilation would have failed) for (i,dt) in enumerate(call_t) if !isbitstype(dt) @@ -156,6 +160,16 @@ abstract type AbstractKernel{F, TT} end quote indirect_memory = cl.AbstractMemory[] + + # add exception info buffer to indirect memory + # XXX: this is too expensive + if kernel.state.exception_info != C_NULL + ctx = cl.context() + if haskey(exception_infos, ctx) + push!(indirect_memory, exception_infos[ctx]) + end + end + clcall(kernel.fun, $call_tt, $(call_args...); indirect_memory, call_kwargs...) end end @@ -167,6 +181,7 @@ end struct HostKernel{F,TT} <: AbstractKernel{F,TT} f::F fun::cl.Kernel + state::KernelState end @@ -191,7 +206,9 @@ function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT} kernel = get(_kernel_instances, h, nothing) if kernel === nothing # create the kernel state object - kernel = HostKernel{F,tt}(f, fun) + exception_info = create_exceptions!(ctx, dev) + state = KernelState(exception_info) + kernel = HostKernel{F,tt}(f, fun, state) _kernel_instances[h] = kernel end return kernel::HostKernel{F,tt} diff --git a/src/device/runtime.jl b/src/device/runtime.jl index b2a78a29..dcee348a 100644 --- a/src/device/runtime.jl +++ b/src/device/runtime.jl @@ -1,7 +1,56 @@ # reset the runtime cache from global scope, so that any change triggers recompilation GPUCompiler.reset_runtime() -signal_exception() = return +## exception handling + +struct ExceptionInfo_st + # whether an exception has been encountered (0 -> 1) + status::Int32 + + ExceptionInfo_st() = new(0) +end + +# to simplify use of this struct, which is passed by-reference, use property overloading +const ExceptionInfo = Ptr{ExceptionInfo_st} +@inline function Base.getproperty(info::ExceptionInfo, sym::Symbol) + if sym === :status + unsafe_load(convert(Ptr{Int32}, info)) + else + getfield(info, sym) + end +end +@inline function Base.setproperty!(info::ExceptionInfo, sym::Symbol, value) + if sym === :status + unsafe_store!(convert(Ptr{Int32}, info), value) + else + setfield!(info, sym, value) + end +end + +## kernel state + +struct KernelState + exception_info::ExceptionInfo + + # XXX: Intel's SPIR-V compiler does not support array-valued kernel arguments, and Julia + # emits homogeneous structs as arrays. Work around this by including a dummy field. + dummy::UInt32 +end +KernelState(exception_info::ExceptionInfo) = KernelState(exception_info, 42) + +@inline @generated kernel_state() = GPUCompiler.kernel_state_value(KernelState) + +function signal_exception() + info = kernel_state().exception_info + + # inform the host + if info != C_NULL + info.status = 1 + write_mem_fence(OpenCL.GLOBAL_MEM_FENCE) + end + + return +end malloc(sz) = C_NULL