Description
The following is an MWE of broadcast overloading which worked before #393 .
struct ArrayFuse{AT,T,P} <: AbstractArray{T,1}
visible::AT
hidden::AT
p::P
end
ArrayFuse(visible::AT, hidden::AT, p) where {AT} = ArrayFuse{AT,eltype(visible),typeof(p)}(visible, hidden, p)
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted) where {AT,T,P}
@. af.visible = af.p[1] * af.visible + af.p[2] * src
@. af.hidden = af.hidden + af.p[3] * af.visible
end
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::AbstractArray) where {AT,T,P}
@. af.visible = af.p[1] * af.visible + af.p[2] * src
@. af.hidden = af.hidden + af.p[3] * af.visible
end
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted) where {AT,T,P}
@. af.visible = af.p[1] * af.visible + af.p[2] * src
@. af.hidden = af.hidden + af.p[3] * af.visible
end
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted{F1,Axes,F,Args}) where {AT,T,P,F1<:Base.Broadcast.AbstractArrayStyle{0},Axes,F,Args<:Tuple}
@. af.visible = af.p[1] * af.visible + af.p[2] * src
@. af.hidden = af.hidden + af.p[3] * af.visible
end
# not recommended but good to have
@inline function Base.getindex(af::ArrayFuse, index)
return af.visible[index]
end
@inline function Base.setindex!(af::ArrayFuse, value, index)
af.visible[index] = af.p[1] * af.visible[index] + af.p[2] * value
af.hidden[index] = muladd(af.p[3], af.visible[index], af.hidden[index])
end
@inline Base.size(af::ArrayFuse) = length(af.visible)
@inline Base.axes(af::ArrayFuse) = axes(af.visible)
using GPUArrays, CUDA
CUDA.allowscalar(false)
N = 256
# Define the initial condition as normal arrays
u0 = zeros(N, N, 3)
u0 .= 1.0
gu0 = CuArray(Float32.(u0))
tmp, u, a, b = [copy(gu0) for i in 1:4]
dt = 0.01
du = ArrayFuse(tmp, u, (a, dt, b))
du .= u
#=
ERROR: This object is not a GPU array
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:35
[2] backend(#unused#::Type)
@ GPUArrays C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\device\execution.jl:15
[3] backend(x::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
@ GPUArrays C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\device\execution.jl:16
[4] _copyto!
@ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:73 [inlined]
[5] materialize!
@ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:51 [inlined]
[6] materialize!(dest::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(identity), Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
@ Base.Broadcast .\broadcast.jl:868
[7] top-level scope
@ c:\Users\accou\OneDrive\Computer\Desktop\test.jl:53
=#
Base.BroadcastStyle(::Type{<:ArrayFuse}) = Broadcast.ArrayStyle{ArrayFuse}()
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted{<:GPUArrays.AbstractGPUArrayStyle}) where {AT,T,P}
@. af.visible = af.p[1] * af.visible + af.p[2] * src
@. af.hidden = af.hidden + af.p[3] * af.visible
end
du .= u
#=
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\error.jl:91
[2] isdone
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\stream.jl:109 [inlined]
[3] nonblocking_synchronize
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\stream.jl:139 [inlined]
[4] nonblocking_synchronize
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\context.jl:325 [inlined]
[5] device_synchronize()
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\context.jl:319
[6] CuModule(data::Vector{UInt8}, options::Dict{CUDA.CUjit_option_enum, Any})
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\module.jl:41
[7] CuModule
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\module.jl:23 [inlined]
[8] cufunction_link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry, :external_gvars), Tuple{Vector{UInt8}, String, Vector{String}}})
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:451
[9] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler C:\Users\accou\.julia\packages\GPUCompiler\I9fZc\src\cache.jl:95
[10] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceArray{Float32, 3, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(*), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}}}}}, Int64}}; name::Nothing,
kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:297
[11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceArray{Float32, 3, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(*), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}}}}}, Int64}})
@ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:291
[12] macro expansion
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:102 [inlined]
[13] #launch_heuristic#282
@ C:\Users\accou\.julia\packages\CUDA\5jdFl\src\gpuarrays.jl:17 [inlined]
[14] _copyto!
@ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:73 [inlined]
[15] materialize!
@ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:51 [inlined]
[16] materialize!
@ .\broadcast.jl:868 [inlined]
[17] copyto!
@ .\REPL[2]:3 [inlined]
[18] materialize!
@ .\broadcast.jl:871 [inlined]
[19] materialize!(dest::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(identity), Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
@ Base.Broadcast .\broadcast.jl:868
[20] top-level scope
@ REPL[3]:1
=#