Skip to content

Broadcast overloading regression #404

Closed
SciML/OrdinaryDiffEq.jl
#1663
@ChrisRackauckas

Description

@ChrisRackauckas

The following is an MWE of broadcast overloading which worked before #393 .

struct ArrayFuse{AT,T,P} <: AbstractArray{T,1}
    visible::AT
    hidden::AT
    p::P
end

ArrayFuse(visible::AT, hidden::AT, p) where {AT} = ArrayFuse{AT,eltype(visible),typeof(p)}(visible, hidden, p)

@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted) where {AT,T,P}
    @. af.visible = af.p[1] * af.visible + af.p[2] * src
    @. af.hidden = af.hidden + af.p[3] * af.visible
end

@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::AbstractArray) where {AT,T,P}
    @. af.visible = af.p[1] * af.visible + af.p[2] * src
    @. af.hidden = af.hidden + af.p[3] * af.visible
end

@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted) where {AT,T,P}
    @. af.visible = af.p[1] * af.visible + af.p[2] * src
    @. af.hidden = af.hidden + af.p[3] * af.visible
end

@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted{F1,Axes,F,Args}) where {AT,T,P,F1<:Base.Broadcast.AbstractArrayStyle{0},Axes,F,Args<:Tuple}
    @. af.visible = af.p[1] * af.visible + af.p[2] * src
    @. af.hidden = af.hidden + af.p[3] * af.visible
end

# not recommended but good to have
@inline function Base.getindex(af::ArrayFuse, index)
    return af.visible[index]
end

@inline function Base.setindex!(af::ArrayFuse, value, index)
    af.visible[index] = af.p[1] * af.visible[index] + af.p[2] * value
    af.hidden[index] = muladd(af.p[3], af.visible[index], af.hidden[index])
end

@inline Base.size(af::ArrayFuse) = length(af.visible)
@inline Base.axes(af::ArrayFuse) = axes(af.visible)

using GPUArrays, CUDA
CUDA.allowscalar(false)
N = 256
# Define the initial condition as normal arrays
u0 = zeros(N, N, 3)
u0 .= 1.0
gu0 = CuArray(Float32.(u0))
tmp, u, a, b = [copy(gu0) for i in 1:4]
dt = 0.01

du = ArrayFuse(tmp, u, (a, dt, b))
du .= u

#=
ERROR: This object is not a GPU array
Stacktrace:
 [1] error(s::String)
   @ Base .\error.jl:35
 [2] backend(#unused#::Type)
   @ GPUArrays C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\device\execution.jl:15
 [3] backend(x::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
   @ GPUArrays C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\device\execution.jl:16
 [4] _copyto!
   @ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:73 [inlined]
 [5] materialize!
   @ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:51 [inlined]
 [6] materialize!(dest::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(identity), Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
   @ Base.Broadcast .\broadcast.jl:868
 [7] top-level scope
   @ c:\Users\accou\OneDrive\Computer\Desktop\test.jl:53
=#

Base.BroadcastStyle(::Type{<:ArrayFuse}) = Broadcast.ArrayStyle{ArrayFuse}()
@inline function Base.copyto!(af::ArrayFuse{AT,T,P}, src::Base.Broadcast.Broadcasted{<:GPUArrays.AbstractGPUArrayStyle}) where {AT,T,P}
    @. af.visible = af.p[1] * af.visible + af.p[2] * src
    @. af.hidden = af.hidden + af.p[3] * af.visible
end

du .= u

#=
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
  [1] throw_api_error(res::CUDA.cudaError_enum)
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\error.jl:91
  [2] isdone
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\stream.jl:109 [inlined]
  [3] nonblocking_synchronize
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\stream.jl:139 [inlined]
  [4] nonblocking_synchronize
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\context.jl:325 [inlined]
  [5] device_synchronize()
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\context.jl:319
  [6] CuModule(data::Vector{UInt8}, options::Dict{CUDA.CUjit_option_enum, Any})
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\module.jl:41
  [7] CuModule
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\lib\cudadrv\module.jl:23 [inlined]
  [8] cufunction_link(job::GPUCompiler.CompilerJob, compiled::NamedTuple{(:image, :entry, :external_gvars), Tuple{Vector{UInt8}, String, Vector{String}}})
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:451
  [9] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
    @ GPUCompiler C:\Users\accou\.julia\packages\GPUCompiler\I9fZc\src\cache.jl:95
 [10] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceArray{Float32, 3, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(*), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}}}}}, Int64}}; name::Nothing,
 kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:297
 [11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CuDeviceArray{Float32, 3, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(*), Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}, Base.Broadcast.Extruded{CuDeviceArray{Float32, 3, 1}, Tuple{Bool, Bool, Bool}, Tuple{Int64, Int64, Int64}}}}}}, Int64}})
    @ CUDA C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:291
 [12] macro expansion
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\src\compiler\execution.jl:102 [inlined]
 [13] #launch_heuristic#282
    @ C:\Users\accou\.julia\packages\CUDA\5jdFl\src\gpuarrays.jl:17 [inlined]
 [14] _copyto!
    @ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:73 [inlined]
 [15] materialize!
    @ C:\Users\accou\.julia\packages\GPUArrays\VNhDf\src\host\broadcast.jl:51 [inlined]
 [16] materialize!
    @ .\broadcast.jl:868 [inlined]
 [17] copyto!
    @ .\REPL[2]:3 [inlined]
 [18] materialize!
    @ .\broadcast.jl:871 [inlined]
 [19] materialize!(dest::ArrayFuse{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float32, Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}, Float64, CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{3}, Nothing, typeof(identity), Tuple{CuArray{Float32, 3, CUDA.Mem.DeviceBuffer}}})
    @ Base.Broadcast .\broadcast.jl:868
 [20] top-level scope
    @ REPL[3]:1
=#

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions